结论
- 采集的pcm可以播放,但是编码aac后不能播放
- 采用的是aac编码器编码,aac编码器需要的是AV_SAMPLE_FMT_FLTP位深才能进行编码,windows下采集到的数据是AV_SAMPLE_FMT_S16,所以需要重采样
- 编码出来的aac数据没有adts头,在编码一帧的aac数据后,自己添加了adts头(adts头里包含采样率、编码格式、声道数)
- 因为在pcm采集声道数据的时候,每次从函数av_read_frame(fmt_ctx, pkt)读取到的pcm的大小都是88200,而编码器每次编码需要的数据大小是4096,所以在采集完数据后做了分包处理
代码
#include <iostream>//#include <WinBase.h>#define __STDC_CONSTANT_MACROSextern "C"{#include "libavutil/avutil.h"#include "libavcodec/avcodec.h"#include "libavdevice/avdevice.h"#include "libswscale/swscale.h"#include "libswresample/swresample.h"#include "libavutil\samplefmt.h"#include <stdio.h>#include <strsafe.h>#include <string.h>}#ifdef _MSC_VER#include <tchar.h>#include <dshow.h>#include <atlcomcli.h>#pragma comment(lib, "Strmiids.lib")#endifusing namespace std;void Convert(const char* strIn, char* strOut, int sourceCodepage, int targetCodepage){ //LPCTSTR LPCTSTR pStr = (LPCTSTR)strIn; int len = lstrlen(pStr); int unicodeLen = MultiByteToWideChar(sourceCodepage, 0, strIn, -1, NULL, 0); wchar_t* pUnicode = NULL; pUnicode = new wchar_t[unicodeLen + 1]; memset(pUnicode, 0, (unicodeLen + 1) * sizeof(wchar_t)); MultiByteToWideChar(sourceCodepage, 0, strIn, -1, (LPWSTR)pUnicode, unicodeLen); BYTE* pTargetData = NULL; int targetLen = WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, 0, NULL, NULL); pTargetData = new BYTE[targetLen + 1]; memset(pTargetData, 0, targetLen + 1); WideCharToMultiByte(targetCodepage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, targetLen, NULL, NULL); lstrcpy((LPSTR)strOut, (LPCSTR)pTargetData); delete pUnicode; delete pTargetData;}void Get_Capture_Audio_Devices_Info(char* name){#ifdef _MSC_VER CoInitialize(NULL); CComPtr<ICreateDevEnum> pCreateDevEnum; HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**)&pCreateDevEnum); CComPtr<IEnumMoniker> pEm; hr = pCreateDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEm, 0); if (hr != NOERROR) { return; } pEm->Reset(); ULONG cFetched; IMoniker* pM = NULL; while (hr = pEm->Next(1, &pM, &cFetched), hr == S_OK) { IPropertyBag* pBag = 0; hr = pM->BindToStorage(0, 0, IID_IPropertyBag, (void**)&pBag); if (SUCCEEDED(hr)) { VARIANT var; var.vt = VT_BSTR; hr = pBag->Read(L"FriendlyName", &var, NULL); //还有其他属性,像描述信息等等... if (hr == NOERROR) { //获取设备名称 WideCharToMultiByte(CP_ACP, 0, var.bstrVal, -1, name, 128, "", NULL); SysFreeString(var.bstrVal); } pBag->Release(); } pM->Release(); } pCreateDevEnum = NULL; pEm = NULL;#else memcpy(name, "default", strlen("default") + 1);#endif}// 判断编码器是否支持某个采样格式(采样大小)static int check_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt){ const enum AVSampleFormat* p = codec->sample_fmts; while (*p != AV_SAMPLE_FMT_NONE) { printf("*p :%s\n", av_get_sample_fmt_name(*p)); if (*p == sample_fmt) return 1; p++; } return 0;}// 从编码器中获取采样率(从编码器所支持的采样率中获取与44100最接近的采样率)static int select_sample_rate(const AVCodec* codec){ const int* p; int best_samplerate = 0; if (!codec->supported_samplerates) return 44100; p = codec->supported_samplerates; while (*p) { if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) best_samplerate = *p; p++; } return best_samplerate;}/* select layout with the highest channel count */static int select_channel_layout(const AVCodec* codec){ const uint64_t* p; uint64_t best_ch_layout = 0; int best_nb_channels = 0; if (!codec->channel_layouts) return AV_CH_LAYOUT_STEREO; p = codec->channel_layouts; while (*p) { int nb_channels = av_get_channel_layout_nb_channels(*p); if (nb_channels > best_nb_channels) { best_ch_layout = *p; best_nb_channels = nb_channels; } p++; } return (int)best_ch_layout;}int main(){ AVFormatContext* fmt_ctx = NULL; AVDictionary* options = NULL; int ret = 0; char errors[1024] = { 0 }; char device_name[256] = {0}; char file_name[256] = "collection.aac"; char file_name_pcm[256] = "pltf_collection.pcm"; char name[128] = { 0 }; char name_utf8[128] = { 0 }; FILE* out_file = NULL; /*音频重采样参数*/ //播放指令 ffplay -ar 44100 -f f32le -channels 2 -i collection.pcm AVPacket* pkt = NULL; int dst_rate = 44100, src_rate = 44100; uint8_t** src_data = NULL, ** dst_data = NULL; int64_t src_ch_layout = AV_CH_LAYOUT_STEREO, dst_ch_layout = AV_CH_LAYOUT_STEREO; enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16, dst_sample_fmt = AV_SAMPLE_FMT_FLTP; int src_nb_channels = 0, dst_nb_channels = 0; int src_linesize, dst_linesize; int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples; int dst_bufsize = 0; /*音频编码参数*/ int got_output; AVCodec* codec; // 编码器 AVCodecContext* codec_context = NULL; // 编码上下文环境 AVFrame* frame; // 原始帧 AVPacket enpkt; // 编码后的包数据 av_register_all(); avdevice_register_all(); AVInputFormat* in_format = av_find_input_format("dshow"); if (in_format == NULL) { printf("av_find_input_format error\n"); } //设置输入的字体,没有这部分会报错 //Immediate exit requested Get_Capture_Audio_Devices_Info(name); Convert(name, name_utf8, CP_ACP, CP_UTF8); sprintf(device_name, "audio=%s", name_utf8); printf("device_name:%s\n", device_name); if ((ret = avformat_open_input(&fmt_ctx, device_name, in_format, NULL)) != 0) { av_strerror(ret, errors, 1024); printf("Failed to open video device, [%s][%d]\n", errors, ret); return -1; } pkt = av_packet_alloc(); //pkt存放采集的pcm数据 av_init_packet(pkt); out_file = fopen(file_name, "wb+"); /*添加音频重采样----start----*/ SwrContext* swr_ctx = NULL; //第一个参数,如果之前有设置好的重采样上下文可以传入,如果没有就传入NULL; //2.输出的channel, nunber/layout(一种布局参数,就是把扬声器放置在哪个位置) //3.输出的采样位数 //4.输出的采样频率 //5.输入的采样通道 //6.输入的采样位数 //7.输入的采样频率 //最后两个是个日志相关的,设成0和NULL就可以了 swr_ctx = swr_alloc_set_opts(NULL, dst_ch_layout, dst_sample_fmt, dst_rate, src_ch_layout, src_sample_fmt, src_rate, 0, NULL); if (!swr_ctx) { return -1; } /* initialize the resampling context */ if ((ret = swr_init(swr_ctx)) < 0) { fprintf(stderr, "Failed to initialize the resampling context\n"); return -1; } //创建输入缓冲区 src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout); printf("src_nb_channels:%d\n", src_nb_channels); ret = av_samples_alloc_array_and_samples( &src_data, //输入缓冲区地址 &src_linesize, //缓冲区大小 src_nb_channels, //通道个数 src_nb_samples, //单通道采样个数 number of samples per channel src_sample_fmt, //采样格式 0); if (ret < 0) { fprintf(stderr, "Could not allocate source samples\n"); return -1; } printf("src_linesize:%d\n", src_linesize); //创建输出缓冲区 max_dst_nb_samples = dst_nb_samples = av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP); dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout); printf("max_dst_nb_samples:%d dst_nb_samples:%d\n", max_dst_nb_samples, dst_nb_samples); ret = av_samples_alloc_array_and_samples( &dst_data, //输出缓冲区地址 &dst_linesize, //缓冲区大小 dst_nb_channels, //通道个数 dst_nb_samples, //单通道采样个数 number of samples per channel dst_sample_fmt, //采样格式 0); if (ret < 0) { fprintf(stderr, "Could not allocate destination samples\n"); return -1; } printf("src_linesize:%d dst_linesize:%d \n", src_linesize, dst_linesize); /*添加音频重采样----end----*/ /*音频编码设置------start-----*/ //1、找到aac的编码器 codec = avcodec_find_encoder_by_name("aac"); if (!codec) { fprintf(stderr, "Codec not found\n"); return -1; } //2、创建编码器的上下文 codec_context = avcodec_alloc_context3(codec); if (!codec_context) { fprintf(stderr, "Could not allocate audio codec context\n"); exit(1); } //3、设置上下文格式 codec_context->profile = FF_PROFILE_AAC_SSR; codec_context->bit_rate = 128000; codec_context->sample_fmt = dst_sample_fmt; if (!check_sample_fmt(codec, codec_context->sample_fmt)) { fprintf(stderr, "Encoder does not support sample format %s", av_get_sample_fmt_name(codec_context->sample_fmt)); return -1; } // 设置采样率,这里通过函数获取,也可以直接写具体值 codec_context->sample_rate = dst_rate;//select_sample_rate(codec); // channel_layout为各个通道存储顺序,可以据此算出声道数。设置声道数也可以直接写具体值 codec_context->channel_layout = select_channel_layout(codec); codec_context->channels = av_get_channel_layout_nb_channels(codec_context->channel_layout); //codec_context->frame_size = dst_nb_samples + 1024; printf("codec_context->channels:%d \n", codec_context->channels); //3、打开编码器 if (avcodec_open2(codec_context, codec, NULL) < 0) { fprintf(stderr, "Could not open codec\n"); exit(1); } int encode_nb_sample = codec_context->frame_size; printf(" 1111 codec_context->frame_size:%d\n", codec_context->frame_size); //4、 初始化原始帧 frame = av_frame_alloc(); if (!frame) { fprintf(stderr, "Could not allocate audio frame\n"); exit(1); } //5、设置帧的参数 printf("codec_context->frame_size:%d\n", dst_nb_samples); frame->nb_samples = encode_nb_sample; // frame的格式和声道信息 printf("codec_context->sample_fmt:%s\n", av_get_sample_fmt_name(codec_context->sample_fmt)); frame->format = codec_context->sample_fmt; printf("codec_context->channel_layout:%d\n", codec_context->channel_layout); frame->channel_layout = codec_context->channel_layout; frame->sample_rate = dst_rate; /* allocate the data buffers */ ret = av_frame_get_buffer(frame, 0); if (ret < 0) { fprintf(stderr, "Could not allocate audio data buffers\n"); exit(1); } printf("frame->linesize[0]:%d\n", frame->linesize[0]); //复制参数、写头信息 //AVStream* st = avformat_new_stream(fmt_ctx, codec); //avcodec_parameters_from_context(st->codecpar, codec_context); //vformat_write_header(fmt_ctx, NULL); printf("frame->linesize[0]:%d \n", frame->linesize[0]); /*音频编码设置------end------*/ av_dump_format(fmt_ctx, 0, device_name, 0); while (!av_read_frame(fmt_ctx, pkt)) { printf("%d\n", pkt->size); /*音频重采样 start*/ /* compute destination number of samples */ dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, src_rate) + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP); printf("------ src_nb_samples:%d \n", dst_nb_samples); if (dst_nb_samples > max_dst_nb_samples) { av_freep(&dst_data[0]); ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels, dst_nb_samples, dst_sample_fmt, 1); if (ret < 0) break; max_dst_nb_samples = dst_nb_samples; } printf("Size of collected data %d\n", pkt->size); //src_data是一个缓冲区数组,只用到第一个缓冲区,所以用0 static int offset = 0; int use_size = 0; while (use_size < pkt->size) { if (offset) { } if (use_size + src_linesize + offset > pkt->size) { offset = pkt->size - use_size; memset(src_data[0], 0, src_linesize); memcpy(src_data[0], pkt->data + use_size, offset); break; } if (offset) { printf("=====================use_size:%d src_linesize-offset:%d pkt->size:%d", use_size, src_linesize - offset, pkt->size); memcpy(src_data[0]+offset, pkt->data + use_size, src_linesize-offset); use_size += (src_linesize - offset); offset = 0; } else { memset(src_data[0], 0, src_linesize); printf("********************** use_size:%d pkt->size:%d src_linesize:%d\n\n", use_size, pkt->size, src_linesize); memcpy(src_data[0], pkt->data + use_size, src_linesize); use_size += src_linesize; } printf("ffffffffffff dst_nb_samples:%d src_nb_samples:%d\n", dst_nb_samples, src_nb_samples); //ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples); ret = swr_convert(swr_ctx, frame->data, frame->nb_samples, (const uint8_t**)src_data, src_nb_samples); if (ret < 0) { fprintf(stderr, "Error while converting\n"); return -1; }// dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,// ret, dst_sample_fmt, 0);// if (dst_bufsize < 0)// {// fprintf(stderr, "Could not get sample buffer size\n")// return -1;// } //fwrite(dst_data[0], 1, dst_bufsize, out_file); //fwrite(dst_data[1], 1, dst_bufsize, out_file); //fflush(out_file); printf("ssss dst_bufsize:%d frame->linesize[0]:%d\n", dst_bufsize, frame->linesize[0]); /*音频重采样 end*/ av_init_packet(&enpkt); enpkt.data = NULL; enpkt.size = 0; //memcpy(frame->data[0], dst_data[0], dst_bufsize/2); //memcpy(frame->data[1], dst_data[0]+ dst_bufsize / 2, dst_bufsize / 2); ret = avcodec_send_frame(codec_context, frame); while (ret >= 0) { ret = avcodec_receive_packet(codec_context, &enpkt); if (ret == -EAGAIN) { continue; } else if (ret < 0) { printf("avcodec_receive_packet error! [%d]\n", ret); return -1; } else { printf("enpkt.size:%d\n", enpkt.size); unsigned char aac_buffer[7] = { 0 }; printf("\n\n\ncodec_context->profile:%d\n", codec_context->profile); int profile = codec_context->profile; // AAC LC int freqIdx = 4; // 44100 //int channels = 2;// 声道数 int chanCfg = 1; // CPE // // fill in ADTS data // aac_buffer[0] = (byte)0xFF; // aac_buffer[1] = (byte)0xF0; // aac_buffer[2] = (byte)(((profile&0x3) << 6) + (freqIdx&0xf << 2) + (chanCfg >> 2)); // aac_buffer[3] = (byte)(((chanCfg & 3) << 6) + (enpkt.size >> 11)); // aac_buffer[4] = (byte)((enpkt.size & 0x7FF) >> 3); // aac_buffer[5] = (byte)(((enpkt.size & 7) << 5) + 0x1F); // aac_buffer[6] = (byte)0xFC; aac_buffer[0] = (byte)0xFF; aac_buffer[1] = (byte)0xF1; //aac_buffer[2] = (byte)(((profile - 1) << 6) + (freqIdx << 2) + (chanCfg >> 2)); //aac_buffer[3] = (byte)(((chanCfg & 3) << 6) + (enpkt.size >> 11)); aac_buffer[2] = 0x50; aac_buffer[3] = 0x80; aac_buffer[4] = (byte)((enpkt.size & 0x7FF) >> 3); aac_buffer[5] = (byte)(((enpkt.size & 7) << 5) + 0x1F); aac_buffer[6] = (byte)0xFC; fwrite(aac_buffer, 1, 7, out_file); fwrite(enpkt.data, 1, enpkt.size, out_file); fflush(out_file); } } } printf("dst_nb_samples:%d src_nb_samples:%d frame->nb_samples:%d\n", dst_nb_samples, src_nb_samples, frame->nb_samples);#if 0 ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t**)src_data, src_nb_samples); if (ret < 0) { fprintf(stderr, "Error while converting\n"); return -1; } dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret, dst_sample_fmt, 1); if (dst_bufsize < 0) { fprintf(stderr, "Could not get sample buffer size\n"); return -1; } /*音频重采样 end*/ /*音频编码 start*/ av_init_packet(&enpkt); enpkt.data = NULL; enpkt.size = 0; ret = avcodec_send_frame(); while (ret >= 0) { ret = avcodec_receive_packet(codec_context, &enpkt); if (ret < 0) { printf("avcodec_receive_packet error!\n"); return -1; } ret = av_write_frame(fmt_ctx, &enpkt); av_packet_unref(&enpkt); } av_write_trailer(fmt_ctx); /*音频编码 end*/ printf("enpkt.size:%d\n", enpkt.size); //fwrite(dst_data[0], 1, dst_bufsize, out_file); //fflush(out_file)#endif ; av_packet_unref(pkt); } swr_free(&swr_ctx); av_packet_free(&pkt); avformat_close_input(&fmt_ctx); fclose(out_file); //avcodec_send_frame // ffplay -video_size 640*480 -pixel_format yuyv422 -framerate 30 collection.yuv return 0;}