结论

采集的pcm可以播放，但是编码aac后不能播放
采用的是aac编码器编码，aac编码器需要的是AV_SAMPLE_FMT_FLTP位深才能进行编码，windows下采集到的数据是AV_SAMPLE_FMT_S16，所以需要重采样
编码出来的aac数据没有adts头，在编码一帧的aac数据后，自己添加了adts头（adts头里包含采样率、编码格式、声道数）
因为在pcm采集声道数据的时候，每次从函数av_read_frame(fmt_ctx, pkt)读取到的pcm的大小都是88200，而编码器每次编码需要的数据大小是4096，所以在采集完数据后做了分包处理
代码


#include <iostream>
//#include <WinBase.h>
#define __STDC_CONSTANT_MACROS
extern "C"
{
#include "libavutil/avutil.h"
#include "libavcodec/avcodec.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavutil\samplefmt.h"
#include <stdio.h>
#include <strsafe.h>
#include <string.h>
}
#ifdef _MSC_VER
#include <tchar.h>
#include <dshow.h>
#include <atlcomcli.h>
#pragma comment(lib, "Strmiids.lib")
#endif
using namespace std;
void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage)
{
    //LPCTSTR
    LPCTSTR pStr = (LPCTSTR)strIn;
    int len = lstrlen(pStr);
    int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, strIn, -1, NULL, 0);
    wchar_t* pUnicode = NULL;
    pUnicode = new wchar_t[unicodeLen + 1];
    memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t));
    MultiByteToWideChar(sourceCodepage, 0, strIn, -1, (LPWSTR)pUnicode, unicodeLen);
    BYTE* pTargetData = NULL;
    int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, 0, NULL, NULL);
    pTargetData = new BYTE[targetLen + 1];
    memset(pTargetData, 0, targetLen + 1);
    WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, targetLen, NULL, NULL);
    lstrcpy((LPSTR)strOut, (LPCSTR)pTargetData);
    delete pUnicode;
    delete pTargetData;
}
void Get_Capture_Audio_Devices_Info(char* name)
{
#ifdef _MSC_VER
    CoInitialize(NULL);
    CComPtr<ICreateDevEnum> pCreateDevEnum;
    HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**)&pCreateDevEnum);
    CComPtr<IEnumMoniker> pEm;
    hr = pCreateDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEm, 0);
    if (hr != NOERROR) {
        return;
    }
    pEm->Reset();
    ULONG cFetched;
    IMoniker* pM = NULL;
    while (hr = pEm->Next(1, &pM, &cFetched), hr == S_OK)
    {
        IPropertyBag* pBag = 0;
        hr = pM->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pBag);
        if (SUCCEEDED(hr))
        {
            VARIANT var;
            var.vt = VT_BSTR;
            hr = pBag->Read(L"FriendlyName", &var, NULL); //还有其他属性,像描述信息等等...
            if (hr == NOERROR)
            {
                //获取设备名称
                WideCharToMultiByte(CP_ACP, 0, var.bstrVal, -1, name, 128, "", NULL);
                SysFreeString(var.bstrVal);
            }
            pBag->Release();
        }
        pM->Release();
    }
    pCreateDevEnum = NULL;
    pEm = NULL;
#else
    memcpy(name, "default", strlen("default") + 1);
#endif
}
// 判断编码器是否支持某个采样格式(采样大小)
static int check_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt)
{
    const enum AVSampleFormat* p = codec->sample_fmts;
    while (*p != AV_SAMPLE_FMT_NONE) {
        printf("*p :%s\n", av_get_sample_fmt_name(*p));
        if (*p == sample_fmt)
            return 1;
        p++;
    }
    return 0;
}
// 从编码器中获取采样率(从编码器所支持的采样率中获取与44100最接近的采样率)
static int select_sample_rate(const AVCodec* codec)
{
    const int* p;
    int best_samplerate = 0;
    if (!codec->supported_samplerates)
        return 44100;
    p = codec->supported_samplerates;
    while (*p) {
        if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
            best_samplerate = *p;
        p++;
    }
    return best_samplerate;
}
/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec* codec)
{
    const uint64_t* p;
    uint64_t best_ch_layout = 0;
    int best_nb_channels = 0;
    if (!codec->channel_layouts)
        return AV_CH_LAYOUT_STEREO;
    p = codec->channel_layouts;
    while (*p) {
        int nb_channels = av_get_channel_layout_nb_channels(*p);
        if (nb_channels > best_nb_channels) {
            best_ch_layout = *p;
            best_nb_channels = nb_channels;
        }
        p++;
    }
    return (int)best_ch_layout;
}
int main()
{
    AVFormatContext* fmt_ctx = NULL;
    AVDictionary* options = NULL;
    int ret = 0;
    char errors[1024] = { 0 };
    char device_name[256] = {0};
    char file_name[256] = "collection.aac";
    char file_name_pcm[256] = "pltf_collection.pcm";
    char name[128] = { 0 };
    char name_utf8[128] = { 0 };
    FILE* out_file = NULL;
    /*音频重采样参数*/ //播放指令 ffplay -ar 44100 -f f32le -channels 2 -i collection.pcm
    AVPacket* pkt = NULL;
    int dst_rate = 44100, src_rate = 44100;
    uint8_t** src_data = NULL, ** dst_data = NULL;
    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_STEREO;
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16, dst_sample_fmt = AV_SAMPLE_FMT_FLTP;
    int src_nb_channels = 0, dst_nb_channels = 0;
    int src_linesize, dst_linesize;
    int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;
    int dst_bufsize = 0;
    /*音频编码参数*/
    int got_output;
    AVCodec* codec; // 编码器
    AVCodecContext* codec_context = NULL; // 编码上下文环境
    AVFrame* frame; // 原始帧
    AVPacket enpkt;   // 编码后的包数据
    av_register_all();
    avdevice_register_all();
    AVInputFormat* in_format = av_find_input_format("dshow");
    if (in_format == NULL)
    {
        printf("av_find_input_format error\n");
    }
    //设置输入的字体，没有这部分会报错 //Immediate exit requested
    Get_Capture_Audio_Devices_Info(name);
    Convert(name, name_utf8, CP_ACP, CP_UTF8);
    sprintf(device_name, "audio=%s", name_utf8);
    printf("device_name:%s\n", device_name);
    if ((ret = avformat_open_input(&fmt_ctx, device_name, in_format, NULL)) != 0)
    {
        av_strerror(ret, errors, 1024);
        printf("Failed to open video device, [%s][%d]\n", errors, ret);
        return -1;
    }
    pkt = av_packet_alloc(); //pkt存放采集的pcm数据
    av_init_packet(pkt);
    out_file = fopen(file_name, "wb+");
   /*添加音频重采样----start----*/
   SwrContext* swr_ctx = NULL;
   //第一个参数，如果之前有设置好的重采样上下文可以传入，如果没有就传入NULL；
   //2.输出的channel, nunber/layout(一种布局参数,就是把扬声器放置在哪个位置)
   //3.输出的采样位数
   //4.输出的采样频率
   //5.输入的采样通道
   //6.输入的采样位数
   //7.输入的采样频率
   //最后两个是个日志相关的，设成0和NULL就可以了
   swr_ctx = swr_alloc_set_opts(NULL,
       dst_ch_layout, dst_sample_fmt, dst_rate,
       src_ch_layout, src_sample_fmt, src_rate,
       0, NULL);
   if (!swr_ctx)
   {
       return -1;
   }
   /* initialize the resampling context */
   if ((ret = swr_init(swr_ctx)) < 0) 
   {
       fprintf(stderr, "Failed to initialize the resampling context\n");
       return -1;
   }
   //创建输入缓冲区
   src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
   printf("src_nb_channels:%d\n", src_nb_channels);
   ret = av_samples_alloc_array_and_samples(
       &src_data, //输入缓冲区地址
       &src_linesize, //缓冲区大小
       src_nb_channels, //通道个数
       src_nb_samples, //单通道采样个数 number of samples per channel
       src_sample_fmt, //采样格式
       0);
   if (ret < 0) {
       fprintf(stderr, "Could not allocate source samples\n");
       return -1;
   }
   printf("src_linesize:%d\n", src_linesize);
   //创建输出缓冲区
   max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
   dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
   printf("max_dst_nb_samples:%d  dst_nb_samples:%d\n", max_dst_nb_samples, dst_nb_samples);
   ret = av_samples_alloc_array_and_samples(
       &dst_data, //输出缓冲区地址
       &dst_linesize, //缓冲区大小
       dst_nb_channels, //通道个数
       dst_nb_samples, //单通道采样个数 number of samples per channel
       dst_sample_fmt, //采样格式
       0);
   if (ret < 0) 
   {
       fprintf(stderr, "Could not allocate destination samples\n");
       return -1;
   }
   printf("src_linesize:%d dst_linesize:%d \n", src_linesize, dst_linesize);
   /*添加音频重采样----end----*/
    /*音频编码设置------start-----*/
    //1、找到aac的编码器
    codec = avcodec_find_encoder_by_name("aac");
    if (!codec) 
    {
        fprintf(stderr, "Codec not found\n");
        return -1;
    }
    //2、创建编码器的上下文
    codec_context = avcodec_alloc_context3(codec);
    if (!codec_context) 
    {
        fprintf(stderr, "Could not allocate audio codec context\n");
        exit(1);
    }
    //3、设置上下文格式
    codec_context->profile = FF_PROFILE_AAC_SSR;
    codec_context->bit_rate = 128000;
    codec_context->sample_fmt = dst_sample_fmt;
    if (!check_sample_fmt(codec, codec_context->sample_fmt)) 
    {
        fprintf(stderr, "Encoder does not support sample format %s",
            av_get_sample_fmt_name(codec_context->sample_fmt));
        return -1;
    }
    // 设置采样率，这里通过函数获取，也可以直接写具体值
    codec_context->sample_rate = dst_rate;//select_sample_rate(codec);
    // channel_layout为各个通道存储顺序，可以据此算出声道数。设置声道数也可以直接写具体值
    codec_context->channel_layout = select_channel_layout(codec);
    codec_context->channels = av_get_channel_layout_nb_channels(codec_context->channel_layout);
    //codec_context->frame_size = dst_nb_samples + 1024;
    printf("codec_context->channels:%d \n", codec_context->channels);
    //3、打开编码器
    if (avcodec_open2(codec_context, codec, NULL) < 0)
    {
        fprintf(stderr, "Could not open codec\n");
        exit(1);
    }
    int encode_nb_sample = codec_context->frame_size;
    printf(" 1111  codec_context->frame_size:%d\n", codec_context->frame_size);
    //4、 初始化原始帧
    frame = av_frame_alloc();
    if (!frame) 
    {
        fprintf(stderr, "Could not allocate audio frame\n");
        exit(1);
    }
    //5、设置帧的参数
    printf("codec_context->frame_size:%d\n", dst_nb_samples);
    frame->nb_samples = encode_nb_sample;
    // frame的格式和声道信息
    printf("codec_context->sample_fmt:%s\n", av_get_sample_fmt_name(codec_context->sample_fmt));
    frame->format = codec_context->sample_fmt;
    printf("codec_context->channel_layout:%d\n", codec_context->channel_layout);
    frame->channel_layout = codec_context->channel_layout;
    frame->sample_rate = dst_rate;
    /* allocate the data buffers */
    ret = av_frame_get_buffer(frame, 0);
    if (ret < 0) 
    {
        fprintf(stderr, "Could not allocate audio data buffers\n");
        exit(1);
    }
    printf("frame->linesize[0]:%d\n", frame->linesize[0]);
    //复制参数、写头信息
    //AVStream* st = avformat_new_stream(fmt_ctx, codec);
    //avcodec_parameters_from_context(st->codecpar, codec_context);
    //vformat_write_header(fmt_ctx, NULL);
    printf("frame->linesize[0]:%d \n", frame->linesize[0]);
    /*音频编码设置------end------*/
    av_dump_format(fmt_ctx, 0, device_name, 0);
    while (!av_read_frame(fmt_ctx, pkt))
    {
        printf("%d\n", pkt->size);
        /*音频重采样  start*/
        /* compute destination number of samples */
        dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) +
            src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
        printf("------ src_nb_samples:%d \n", dst_nb_samples);
        if (dst_nb_samples > max_dst_nb_samples)
        {
            av_freep(&dst_data[0]);
            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                dst_nb_samples, dst_sample_fmt, 1);
            if (ret < 0)
                break;
            max_dst_nb_samples = dst_nb_samples;
        }
        printf("Size of collected data %d\n", pkt->size);
        //src_data是一个缓冲区数组，只用到第一个缓冲区，所以用0
        static int offset = 0;
        int use_size = 0;
        while (use_size < pkt->size)
        {
            if (offset)
            {
            }
            if (use_size + src_linesize + offset > pkt->size)
            {
                offset = pkt->size - use_size;
                memset(src_data[0], 0, src_linesize);
                memcpy(src_data[0], pkt->data + use_size, offset);
                break;
            }
            if (offset)
            { 
                printf("=====================use_size:%d src_linesize-offset:%d pkt->size:%d", use_size, src_linesize - offset, pkt->size);
                memcpy(src_data[0]+offset, pkt->data + use_size, src_linesize-offset);
                use_size += (src_linesize - offset);
                offset = 0;
            }
            else
            {
                memset(src_data[0], 0, src_linesize);
                printf("********************** use_size:%d pkt->size:%d src_linesize:%d\n\n", use_size, pkt->size, src_linesize);
                memcpy(src_data[0], pkt->data + use_size, src_linesize);
                use_size += src_linesize;
            }
            printf("ffffffffffff dst_nb_samples:%d src_nb_samples:%d\n", dst_nb_samples, src_nb_samples);
            //ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);
            ret = swr_convert(swr_ctx, frame->data, frame->nb_samples, (const uint8_t**)src_data, src_nb_samples);
            if (ret < 0)
            {
                fprintf(stderr, "Error while converting\n");
                return -1;
            }
//            dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
//                ret, dst_sample_fmt, 0);
//            if (dst_bufsize < 0)
//            {
//                fprintf(stderr, "Could not get sample buffer size\n")
//                return -1;
//            }
            //fwrite(dst_data[0], 1, dst_bufsize, out_file);
            //fwrite(dst_data[1], 1, dst_bufsize, out_file);
            //fflush(out_file);
            printf("ssss   dst_bufsize:%d frame->linesize[0]:%d\n", dst_bufsize, frame->linesize[0]);
            /*音频重采样  end*/
           av_init_packet(&enpkt);
           enpkt.data = NULL;
           enpkt.size = 0;
           //memcpy(frame->data[0], dst_data[0], dst_bufsize/2);
           //memcpy(frame->data[1], dst_data[0]+ dst_bufsize / 2, dst_bufsize / 2);
           ret = avcodec_send_frame(codec_context, frame);
           while (ret >= 0)
           {
               ret = avcodec_receive_packet(codec_context, &enpkt);
               if (ret == -EAGAIN)
               {
                   continue;
               }
               else if (ret < 0)
               {
                   printf("avcodec_receive_packet error! [%d]\n", ret);
                   return -1;
               }
               else
               {
                   printf("enpkt.size:%d\n", enpkt.size);
                   unsigned char aac_buffer[7] = { 0 };
                   printf("\n\n\ncodec_context->profile:%d\n", codec_context->profile);
                   int profile = codec_context->profile; // AAC LC
                   int freqIdx = 4; // 44100
                   //int channels = 2;// 声道数
                   int chanCfg = 1; // CPE
               //   // fill in ADTS data
               //   aac_buffer[0] = (byte)0xFF;
               //   aac_buffer[1] = (byte)0xF0;
               //   aac_buffer[2] = (byte)(((profile&0x3) << 6) + (freqIdx&0xf << 2) + (chanCfg >> 2));
               //   aac_buffer[3] = (byte)(((chanCfg & 3) << 6) + (enpkt.size >> 11));
               //   aac_buffer[4] = (byte)((enpkt.size & 0x7FF) >> 3);
               //   aac_buffer[5] = (byte)(((enpkt.size & 7) << 5) + 0x1F);
               //   aac_buffer[6] = (byte)0xFC;
                   aac_buffer[0] = (byte)0xFF;
                   aac_buffer[1] = (byte)0xF1;
                   //aac_buffer[2] = (byte)(((profile - 1) << 6) + (freqIdx << 2) + (chanCfg >> 2));
                   //aac_buffer[3] = (byte)(((chanCfg & 3) << 6) + (enpkt.size >> 11));
                   aac_buffer[2] = 0x50;
                   aac_buffer[3] = 0x80;
                   aac_buffer[4] = (byte)((enpkt.size & 0x7FF) >> 3);
                   aac_buffer[5] = (byte)(((enpkt.size & 7) << 5) + 0x1F);
                   aac_buffer[6] = (byte)0xFC;
                   fwrite(aac_buffer, 1, 7, out_file);
                   fwrite(enpkt.data, 1, enpkt.size, out_file);
                   fflush(out_file);
               }
           }
        }
        printf("dst_nb_samples:%d src_nb_samples:%d frame->nb_samples:%d\n", dst_nb_samples, src_nb_samples, frame->nb_samples);
#if 0
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples);
        if (ret < 0)
        {
            fprintf(stderr, "Error while converting\n");
            return -1;
        }
        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
            ret, dst_sample_fmt, 1);
        if (dst_bufsize < 0)
        {
            fprintf(stderr, "Could not get sample buffer size\n");
            return -1;
        }
        /*音频重采样  end*/
        /*音频编码 start*/
        av_init_packet(&enpkt);
        enpkt.data = NULL;
        enpkt.size = 0;
        ret = avcodec_send_frame();
        while (ret >= 0)
        {
            ret = avcodec_receive_packet(codec_context, &enpkt);
            if (ret < 0)
            {
                printf("avcodec_receive_packet error!\n");
                return -1;
            }
            ret = av_write_frame(fmt_ctx, &enpkt);
            av_packet_unref(&enpkt);
        }
        av_write_trailer(fmt_ctx);
        /*音频编码 end*/
        printf("enpkt.size:%d\n", enpkt.size);
        //fwrite(dst_data[0], 1, dst_bufsize, out_file);
        //fflush(out_file)
#endif
        ;  
        av_packet_unref(pkt);
    }
    swr_free(&swr_ctx);
    av_packet_free(&pkt);
    avformat_close_input(&fmt_ctx);
    fclose(out_file);
    //avcodec_send_frame
    // ffplay -video_size 640*480 -pixel_format yuyv422 -framerate 30 collection.yuv
    return 0;
}
FFMPEG

编码aac->数据来自电脑采集的pcm

结论

代码