接上篇:单路通话,Freeswitch录制视频CPU高的原因,主要是开启media_bug通道会涉及一次H264解码、两次H264编码,所以CPU会很高;
解决思路就是:使用rtsp/rtmp转发流的方式进行录制,可以直接将源端发送过来的H264码流转发给rtsp/rtmp服务器,这样就减少了Freeswitch端的解码和编码过程;
上篇已经减少了一次转发给b_leg时的编码,确实CPU负载就降下来了,这次彻底去掉H264的解码,和media_bug录制时的编码;
前奏:分析media_bug的解码,然后录制的流程;
录制到mod_av的调用栈:
代码语言:javascript复制Thread 46 "freeswitch" hit Breakpoint 3, av_file_write_video (handle=0x7ff32030d718, frame=0x7ff31934fc70) at avformat.c:2880
2880 av_file_context_t *context = (av_file_context_t *)handle->private_info;
(gdb) bt
#0 av_file_write_video (handle=0x7ff32030d718, frame=0x7ff31934fc70) at avformat.c:2880
#1 0x00007ff34bd16d7f in record_callback (bug=0x23becd8, user_data=0x7ff32030d8f8, type=SWITCH_ABC_TYPE_STREAM_VIDEO_PING)
at src/switch_ivr_async.c:1651
#2 0x00007ff34bc5fc74 in video_bug_thread (thread=<optimized out>, obj=0x23becd8) at src/switch_core_media_bug.c:766
#3 0x00007ff34bf87b5c in dummy_worker (opaque=0x7ff32c05b7d0) at threadproc/unix/thread.c:151
#4 0x00007ff34969a17a in start_thread () from /lib64/libpthread.so.0
#5 0x00007ff348c4cdc3 in clone () from /lib64/libc.so.6
(gdb) d 1
1:去掉media_bug的视频decode处理;switch_core_media_bug_add方法,注释掉CF_VIDEO_DECODED_READ这个处理就可以了,这样还可以恢复到freeswitch解码之前的状态,这样就是扩展代码功能,而不是修改原来的代码逻辑;
代码语言:javascript复制#if 1//no decode read
if (switch_test_flag(bug, SMBF_READ_VIDEO_STREAM) || switch_test_flag(bug, SMBF_WRITE_VIDEO_STREAM) || switch_test_flag(bug, SMBF_READ_VIDEO_PING) || switch_test_flag(bug, SMBF_WRITE_VIDEO_PING)) {
//switch_channel_set_flag_recursive(session->channel, CF_VIDEO_DECODED_READ);
switch_channel_set_flag_recursive(session->channel, CF_VIDEO_NO_DECODED_READ);
//create a frame queue
switch_queue_create(&bug->write_video_frame_queue, SWITCH_CORE_QUEUE_LEN, switch_core_session_get_pool(session));
}
#endif
在switch_media_bug 结构体中增加了一个video queue,用来接收frame;
代码语言:javascript复制struct switch_media_bug {
switch_queue_t *read_video_queue;
switch_queue_t *write_video_queue;
switch_queue_t *write_video_frame_queue;//frame queue
2、确定frame的来源,session接收视频帧的堆栈结构:
代码语言:javascript复制(gdb) bt
#0 switch_core_session_read_video_frame (session=0x7fce90063bf8, frame=frame@entry=0x7fce81067be8, flags=flags@entry=0,
stream_id=stream_id@entry=0) at src/switch_core_media.c:14973
#1 0x00007fceb2d6161a in video_bridge_thread (session=0x7fce90063bf8, obj=0x7fce81028790) at src/switch_ivr_bridge.c:236
#2 0x00007fceb2d1dbee in video_helper_thread (thread=<optimized out>, obj=0x7fce9007f2b0) at src/switch_core_media.c:7548
是在函数 switch_core_session_read_video_frame 中完成的取帧过程,这个方法会调到switch_core_media_read_frame方法:
代码语言:javascript复制 while (smh->media_flags[SCMF_RUNNING] && engine->read_frame.datalen == 0) {
engine->read_frame.flags = SFF_NONE;
status = switch_rtp_zerocopy_read_frame(engine->rtp_session, &engine->read_frame, flags);
if (status != SWITCH_STATUS_SUCCESS && status != SWITCH_STATUS_BREAK) {
if (status == SWITCH_STATUS_TIMEOUT) {
if (switch_channel_get_variable(session->channel, "execute_on_media_timeout")) {
*frame = &engine->read_frame;
switch_set_flag((*frame), SFF_CNG);
(*frame)->datalen = engine->read_impl.encoded_bytes_per_packet;
memset((*frame)->data, 0, (*frame)->datalen);
switch_channel_execute_on(session->channel, "execute_on_media_timeout");
switch_goto_status(SWITCH_STATUS_SUCCESS, end);
}
switch_channel_hangup(session->channel, SWITCH_CAUSE_MEDIA_TIMEOUT);
}
goto end;
}
....
/* Fast PASS! */
if (switch_test_flag((&engine->read_frame), SFF_PROXY_PACKET)) {
*frame = &engine->read_frame;
switch_goto_status(SWITCH_STATUS_SUCCESS, end);
}
}
if (engine->read_frame.datalen == 0) {
*frame = NULL;
}
if (type == SWITCH_MEDIA_TYPE_TEXT && !switch_test_flag((&engine->read_frame), SFF_CNG)) {
} else {
*frame = &engine->read_frame;
}
status = SWITCH_STATUS_SUCCESS;
end:
if (smh->read_mutex[type]) {
switch_mutex_unlock(smh->read_mutex[type]);
}
也就是这里输出的frame的地址其实就是engine->read_frame对象,所以在会话的生命周期内,只能使用输出的frame对象,不能释放这块内存,那么如果要使用这个frame的话,只能另外生成动态内存去保存这个frame中的数据;
所以修改switch_core_session_read_video_frame和switch_core_session_write_video_frame方法,将接收到的frame转发给write_video_frame_queue队列;
代码语言:javascript复制//写逻辑,拷贝一份frame 回调给video bug
if ((*frame)){
switch_frame_t *temp_frame = (*frame);
#if 1//add for no need encode write
switch_frame_t *video_ping_frame = (
switch_frame_t *)malloc(sizeof(switch_frame_t));
if (video_ping_frame == NULL){
return status;
}
memset(video_ping_frame, 0x00, sizeof(switch_frame_t));
video_ping_frame->data = (void *)malloc(temp_frame->datalen);
if ( video_ping_frame->data == NULL){
free(video_ping_frame);
video_ping_frame = NULL;
return status;
}
memcpy(video_ping_frame->data , temp_frame->data, temp_frame->datalen);
video_ping_frame->datalen =temp_frame->datalen;
video_ping_frame->buflen = temp_frame->datalen;
video_ping_frame->flags = temp_frame->flags;
video_ping_frame->timestamp = temp_frame->timestamp;
video_ping_frame->m = temp_frame->m;
video_ping_frame->seq = temp_frame->seq;
if (!debug_read_flag){
debug_read_flag = 1;
switch_log_printf(SWITCH_CHANNEL_CHANNEL_LOG(session->channel), SWITCH_LOG_INFO, "################[2] %s read thread and post toread_video_queue, address:lx, len:%dn", switch_core_session_get_name(session),
(intptr_t )(int *)video_ping_frame, temp_frame->datalen);
}
switch_queue_push(bp->write_video_frame_queue, video_ping_frame);
#endif//
}
video_bug_thread的修改:
代码语言:javascript复制switch_queue_t *no_decoded_q = bug->write_video_frame_queue;
#if 1// add for no decoded frame directy input to frame.
if (no_decoded_q ){
//flush_videof_frame_queue(no_decoded_q, 1);
if ((status = switch_queue_trypop(no_decoded_q, &other_no_deceded_pop)) == SWITCH_STATUS_SUCCESS) {
switch_frame_t *new_frame = (switch_frame_t *) other_no_deceded_pop;
if (other_no_deceded_pop == NULL){
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(bug->session), SWITCH_LOG_DEBUG, "video_bug_thread %s is null n", switch_channel_get_name(bug->session->channel));
contineu;
}
switch_thread_rwlock_rdlock(bug->session->bug_rwlock);
bug->video_ping_frame = new_frame;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(bug->session), SWITCH_LOG_INFO, "video_bug_thread %s, frame:lx, seq:%d len:%dn", switch_channel_get_name(bug->session->channel), (intptr_t )(int *)&new_frame[0], new_frame->seq, new_frame->datalen);
if (bug->callback) {
if (bug->callback(bug, bug->user_data, SWITCH_ABC_TYPE_STREAM_VIDEO_PING) == SWITCH_FALSE
|| (bug->stop_time && bug->stop_time <= switch_epoch_time_now(NULL))) {
ok = SWITCH_FALSE;
}
}
if (new_frame->data != NULL){
free(new_frame->data);
new_frame->data = NULL;
}
//need free
free(new_frame);
bug->video_ping_frame = NULL;
other_no_deceded_pop = NULL;
switch_thread_rwlock_unlock(bug->session->bug_rwlock);
}
}
#endif//
avformat.c的修改av_file_write_video方法:
代码语言:javascript复制 if (!frame->img) {
#if 0
switch_goto_status(SWITCH_STATUS_FALSE, end);
#else//for no video decode ,modify by lyz.
AVPacket pkt = { 0 };
context->last_received_timestamp = frame->timestamp;
status = avformat_buffer_h264_nalu(context, frame);
if (status == SWITCH_STATUS_RESTART) {
switch_set_flag(frame, SFF_WAIT_KEY_FRAME);
switch_buffer_zero(context->nalu_buffer);
return SWITCH_STATUS_MORE_DATA;
}
if (frame->m) {
uint32_t size = switch_buffer_inuse(context->nalu_buffer);
av_init_packet(&pkt);
switch_buffer_write(context->nalu_buffer, ff_input_buffer_padding, sizeof(ff_input_buffer_padding));
switch_buffer_peek_zerocopy(context->nalu_buffer, (const void **)&pkt.data);
pkt.size = size;
//redjust timestamp.
{
uint64_t delta_tmp;
uint64_t delta_sum = 0, delta_i = 0,delta_avg = 0;
switch_core_timer_next(context->eh.video_timer);
delta_tmp = (context->video_timer.samplecount * 90) - context->eh.last_ts;
if (delta_tmp != 0) {
delta_sum = delta_tmp;
delta_i ;
if (delta_i == UINT64_MAX) {
delta_i = 1;
delta_sum = delta_avg;
}
if ((delta_i % 10) == 0) {
delta_avg = (int)(double)(delta_sum / delta_i);
}
pkt.pts = context->video_timer.samplecount * 90;
} else {
pkt.pts = ((context->video_timer.samplecount) * 90) 1;
}
context->eh.last_ts = pkt.pts;
pkt.dts = pkt.pts;
}
//frame->timestamp;
//switch_mutex_lock(context->eh.mutex);
GCC_DIAG_OFF(deprecated-declarations)
if (context->video_st.st == NULL || context->video_st.st->codec == NULL){
//switch_mutex_unlock(context->eh.mutex);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "codec is null failedn");
switch_goto_status(SWITCH_STATUS_SUCCESS, end);
}
write_frame(context->fc, &context->video_st.st->codec->time_base, context->video_st.st, &pkt);
GCC_DIAG_ON(deprecated-declarations)
//switch_mutex_unlock(context->eh.mutex);
av_packet_unref(&pkt);
switch_buffer_zero(context->nalu_buffer);
context->nalu_28_start = 0;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING,
"input write frame len:%dn", size);
switch_goto_status(SWITCH_STATUS_SUCCESS, end);
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING,
"input write frame datalen:%dn", frame->datalen);
switch_goto_status(SWITCH_STATUS_SUCCESS, end);
#endif//
}
一对一视频通话,录制转发过程中CPU情况:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME COMMAND
491166 root -2 -10 849532 63960 23704 S 2.0 1.6 0:00.61 freeswitch
音频录制堆栈:
代码语言:javascript复制Thread 67 "freeswitch" hit Breakpoint 1, av_file_write (handle=0x7fffe402c108, data=0x7fffec009630, len=0x7fffc5685c78) at avformat.c:2046
2046 if (!switch_test_flag(handle, SWITCH_FILE_FLAG_WRITE)) {
(gdb) bt
#0 av_file_write (handle=0x7fffe402c108, data=0x7fffec009630, len=0x7fffc5685c78) at avformat.c:2046
#1 0x00007ffff77bdec7 in switch_core_file_write (fh=0x7fffe402c108, data=0x7fffec009630, data@entry=0x7fffec0d7e98,
len=len@entry=0x7fffc5685c78) at src/switch_core_file.c:672
#2 0x00007ffff786770e in recording_thread (thread=<optimized out>, obj=<optimized out>) at src/switch_ivr_async.c:1281
#3 0x00007ffff7adae3c in dummy_worker (opaque=0x7fffe84fed30) at threadproc/unix/thread.c:151
#4 0x00007ffff51ed17a in start_thread () from /lib64/libpthread.so.0
#5 0x00007ffff479fdc3 in clone () from /lib64/libc.so.6
(gdb)
遗留细节问题:音视频录制时间戳需要做同步处理,问题产生是因为:
视频是直接使用源端对过来的h264 rtp包,而音频则是重新采用经过编码后转发出去的,那么是不是音频也可以像视频那样修改,不做编解码呢,理论上当然是可以的,不过音频编解码的性能损耗没那么大,所以这里可以不用做处理了。