Freeswitch视频录制CPU负载高问题解决

接上篇：单路通话，Freeswitch录制视频CPU高的原因，主要是开启media_bug通道会涉及一次H264解码、两次H264编码，所以CPU会很高；

解决思路就是：使用rtsp/rtmp转发流的方式进行录制，可以直接将源端发送过来的H264码流转发给rtsp/rtmp服务器，这样就减少了Freeswitch端的解码和编码过程；

上篇已经减少了一次转发给b_leg时的编码，确实CPU负载就降下来了，这次彻底去掉H264的解码，和media_bug录制时的编码；

前奏：分析media_bug的解码，然后录制的流程；

录制到mod_av的调用栈：

代码语言：javascript复制

Thread 46 "freeswitch" hit Breakpoint 3, av_file_write_video (handle=0x7ff32030d718, frame=0x7ff31934fc70) at avformat.c:2880
2880            av_file_context_t *context = (av_file_context_t *)handle->private_info;
(gdb) bt
#0  av_file_write_video (handle=0x7ff32030d718, frame=0x7ff31934fc70) at avformat.c:2880
#1  0x00007ff34bd16d7f in record_callback (bug=0x23becd8, user_data=0x7ff32030d8f8, type=SWITCH_ABC_TYPE_STREAM_VIDEO_PING)
    at src/switch_ivr_async.c:1651
#2  0x00007ff34bc5fc74 in video_bug_thread (thread=<optimized out>, obj=0x23becd8) at src/switch_core_media_bug.c:766
#3  0x00007ff34bf87b5c in dummy_worker (opaque=0x7ff32c05b7d0) at threadproc/unix/thread.c:151
#4  0x00007ff34969a17a in start_thread () from /lib64/libpthread.so.0
#5  0x00007ff348c4cdc3 in clone () from /lib64/libc.so.6
(gdb) d 1

1：去掉media_bug的视频decode处理；switch_core_media_bug_add方法，注释掉CF_VIDEO_DECODED_READ这个处理就可以了，这样还可以恢复到freeswitch解码之前的状态，这样就是扩展代码功能，而不是修改原来的代码逻辑；

代码语言：javascript复制

#if 1//no decode read  
	if (switch_test_flag(bug, SMBF_READ_VIDEO_STREAM) || switch_test_flag(bug, SMBF_WRITE_VIDEO_STREAM) || switch_test_flag(bug, SMBF_READ_VIDEO_PING) || switch_test_flag(bug, SMBF_WRITE_VIDEO_PING)) {
		//switch_channel_set_flag_recursive(session->channel, CF_VIDEO_DECODED_READ);
		switch_channel_set_flag_recursive(session->channel, CF_VIDEO_NO_DECODED_READ);

            //create a frame queue
	      switch_queue_create(&bug->write_video_frame_queue, SWITCH_CORE_QUEUE_LEN, switch_core_session_get_pool(session));
	}
#endif

在switch_media_bug 结构体中增加了一个video queue，用来接收frame；

代码语言：javascript复制

struct switch_media_bug {
	switch_queue_t *read_video_queue;
	switch_queue_t *write_video_queue;
	switch_queue_t *write_video_frame_queue;//frame queue

2、确定frame的来源，session接收视频帧的堆栈结构：

代码语言：javascript复制

(gdb) bt
#0  switch_core_session_read_video_frame (session=0x7fce90063bf8, frame=frame@entry=0x7fce81067be8, flags=flags@entry=0,
    stream_id=stream_id@entry=0) at src/switch_core_media.c:14973
#1  0x00007fceb2d6161a in video_bridge_thread (session=0x7fce90063bf8, obj=0x7fce81028790) at src/switch_ivr_bridge.c:236
#2  0x00007fceb2d1dbee in video_helper_thread (thread=<optimized out>, obj=0x7fce9007f2b0) at src/switch_core_media.c:7548

是在函数 switch_core_session_read_video_frame 中完成的取帧过程，这个方法会调到switch_core_media_read_frame方法：

代码语言：javascript复制

	while (smh->media_flags[SCMF_RUNNING] && engine->read_frame.datalen == 0) {
		engine->read_frame.flags = SFF_NONE;
		status = switch_rtp_zerocopy_read_frame(engine->rtp_session, &engine->read_frame, flags);


		if (status != SWITCH_STATUS_SUCCESS && status != SWITCH_STATUS_BREAK) {
			if (status == SWITCH_STATUS_TIMEOUT) {

				if (switch_channel_get_variable(session->channel, "execute_on_media_timeout")) {
					*frame = &engine->read_frame;
					switch_set_flag((*frame), SFF_CNG);
					(*frame)->datalen = engine->read_impl.encoded_bytes_per_packet;
					memset((*frame)->data, 0, (*frame)->datalen);
					switch_channel_execute_on(session->channel, "execute_on_media_timeout");
					switch_goto_status(SWITCH_STATUS_SUCCESS, end);
				}


				switch_channel_hangup(session->channel, SWITCH_CAUSE_MEDIA_TIMEOUT);
			}
			goto end;
		}
		....
		
		
		/* Fast PASS! */
		if (switch_test_flag((&engine->read_frame), SFF_PROXY_PACKET)) {
			*frame = &engine->read_frame;
			switch_goto_status(SWITCH_STATUS_SUCCESS, end);
		}
	}
	
	if (engine->read_frame.datalen == 0) {
		*frame = NULL;
	}


	if (type == SWITCH_MEDIA_TYPE_TEXT && !switch_test_flag((&engine->read_frame), SFF_CNG)) {
		} else {
		*frame = &engine->read_frame;
	}

	status = SWITCH_STATUS_SUCCESS;

 end:

	if (smh->read_mutex[type]) {
		switch_mutex_unlock(smh->read_mutex[type]);
	}

也就是这里输出的frame的地址其实就是engine->read_frame对象，所以在会话的生命周期内，只能使用输出的frame对象，不能释放这块内存，那么如果要使用这个frame的话，只能另外生成动态内存去保存这个frame中的数据；

所以修改switch_core_session_read_video_frame和switch_core_session_write_video_frame方法，将接收到的frame转发给write_video_frame_queue队列；

代码语言：javascript复制

//写逻辑，拷贝一份frame  回调给video bug

if ((*frame)){
switch_frame_t *temp_frame = (*frame);
   #if 1//add for no need encode write
		   switch_frame_t *video_ping_frame =  (
switch_frame_t *)malloc(sizeof(switch_frame_t));

				if (video_ping_frame == NULL){
					 return status;
				}
	 memset(video_ping_frame, 0x00, sizeof(switch_frame_t));
				video_ping_frame->data = (void *)malloc(temp_frame->datalen);
	if ( video_ping_frame->data  == NULL){
		free(video_ping_frame);
		video_ping_frame = NULL;
		return status;

	}
	   memcpy(video_ping_frame->data , temp_frame->data, temp_frame->datalen);
	video_ping_frame->datalen =temp_frame->datalen;
	video_ping_frame->buflen = temp_frame->datalen;
	video_ping_frame->flags = temp_frame->flags;
	video_ping_frame->timestamp = temp_frame->timestamp;
	video_ping_frame->m = temp_frame->m;
	video_ping_frame->seq = temp_frame->seq;

			   
if (!debug_read_flag){
debug_read_flag = 1;
switch_log_printf(SWITCH_CHANNEL_CHANNEL_LOG(session->channel), SWITCH_LOG_INFO, "################[2] %s read thread and post toread_video_queue, address:lx, len:%dn", switch_core_session_get_name(session), 
(intptr_t )(int *)video_ping_frame, temp_frame->datalen);
} 
		switch_queue_push(bp->write_video_frame_queue, video_ping_frame);
#endif//
}

video_bug_thread的修改：

代码语言：javascript复制

switch_queue_t *no_decoded_q = bug->write_video_frame_queue;
#if 1// add for no decoded frame directy input to frame.
             if (no_decoded_q ){

			//flush_videof_frame_queue(no_decoded_q, 1);
			if ((status = switch_queue_trypop(no_decoded_q, &other_no_deceded_pop)) == SWITCH_STATUS_SUCCESS) {
                                
				switch_frame_t *new_frame = (switch_frame_t *) other_no_deceded_pop;
				if (other_no_deceded_pop  == NULL){
				switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(bug->session), SWITCH_LOG_DEBUG, "video_bug_thread 	%s is null n", switch_channel_get_name(bug->session->channel));
					contineu;
				}
        			switch_thread_rwlock_rdlock(bug->session->bug_rwlock);

        			bug->video_ping_frame = new_frame;

					
				switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(bug->session), SWITCH_LOG_INFO, "video_bug_thread 	%s,	frame:lx, seq:%d len:%dn", switch_channel_get_name(bug->session->channel), 	(intptr_t )(int *)&new_frame[0], new_frame->seq, new_frame->datalen);
        			if (bug->callback) {
        				if (bug->callback(bug, bug->user_data, SWITCH_ABC_TYPE_STREAM_VIDEO_PING) == SWITCH_FALSE
        					|| (bug->stop_time && bug->stop_time <= switch_epoch_time_now(NULL))) {
        					ok = SWITCH_FALSE;
        				}
        			}

				if (new_frame->data != NULL){
					free(new_frame->data);
					new_frame->data = NULL;
				}
				
				//need free
				free(new_frame);

        			bug->video_ping_frame = NULL;
				other_no_deceded_pop = NULL;
        			switch_thread_rwlock_unlock(bug->session->bug_rwlock);

		       }
             }
             #endif//

avformat.c的修改av_file_write_video方法：

代码语言：javascript复制

	if (!frame->img) {


    #if 0
		switch_goto_status(SWITCH_STATUS_FALSE, end);
    #else//for no video decode ,modify by lyz.
            AVPacket pkt = { 0 };

	     context->last_received_timestamp = frame->timestamp;
 

	     status = avformat_buffer_h264_nalu(context, frame); 

		if (status == SWITCH_STATUS_RESTART) {
			switch_set_flag(frame, SFF_WAIT_KEY_FRAME);
			switch_buffer_zero(context->nalu_buffer);
			return SWITCH_STATUS_MORE_DATA;
		}

		if (frame->m) {
			   uint32_t size = switch_buffer_inuse(context->nalu_buffer);


	                av_init_packet(&pkt);
	                switch_buffer_write(context->nalu_buffer, ff_input_buffer_padding, sizeof(ff_input_buffer_padding));
	                switch_buffer_peek_zerocopy(context->nalu_buffer, (const void **)&pkt.data);
	                pkt.size = size;

			  //redjust timestamp.
  			 {
	  			uint64_t delta_tmp;
	  			uint64_t   delta_sum = 0, delta_i = 0,delta_avg = 0;
	  			switch_core_timer_next(context->eh.video_timer);
	  			delta_tmp = (context->video_timer.samplecount * 90) - context->eh.last_ts;
	  
	  			if (delta_tmp != 0) {
	  				delta_sum  = delta_tmp;
	  				delta_i  ;
	  
	  				if (delta_i == UINT64_MAX) {
	  					delta_i = 1;
	  					delta_sum = delta_avg;
	  				}
	  
	  				if ((delta_i % 10) == 0) {
	  					delta_avg = (int)(double)(delta_sum / delta_i);
	  				}
	  				
	  		        pkt.pts = context->video_timer.samplecount * 90;
  				} else {
  					pkt.pts = ((context->video_timer.samplecount) * 90)   1;
  				}
				context->eh.last_ts = pkt.pts;
				pkt.dts = pkt.pts;
  			}
			//frame->timestamp;
	                //switch_mutex_lock(context->eh.mutex);
	                GCC_DIAG_OFF(deprecated-declarations)
			  if (context->video_st.st == NULL || context->video_st.st->codec  == NULL){
					
	                	//switch_mutex_unlock(context->eh.mutex);
				switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "codec is null failedn");
	                	switch_goto_status(SWITCH_STATUS_SUCCESS, end);
			  }

	                write_frame(context->fc, &context->video_st.st->codec->time_base,  context->video_st.st, &pkt);
	                GCC_DIAG_ON(deprecated-declarations) 
	                //switch_mutex_unlock(context->eh.mutex);
	                av_packet_unref(&pkt);
					
				switch_buffer_zero(context->nalu_buffer);
				context->nalu_28_start = 0;
			   switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, 
				"input write frame len:%dn", size);
	                switch_goto_status(SWITCH_STATUS_SUCCESS, end);
		    }
	   switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, 
				"input write frame datalen:%dn", frame->datalen);
	      switch_goto_status(SWITCH_STATUS_SUCCESS, end);
 #endif//
	}

一对一视频通话，录制转发过程中CPU情况：

PID USER PR NI VIRT RES SHR S %CPU %MEM TIME COMMAND

491166 root -2 -10 849532 63960 23704 S 2.0 1.6 0:00.61 freeswitch

音频录制堆栈：

代码语言：javascript复制

Thread 67 "freeswitch" hit Breakpoint 1, av_file_write (handle=0x7fffe402c108, data=0x7fffec009630, len=0x7fffc5685c78) at avformat.c:2046
2046            if (!switch_test_flag(handle, SWITCH_FILE_FLAG_WRITE)) {
(gdb) bt
#0  av_file_write (handle=0x7fffe402c108, data=0x7fffec009630, len=0x7fffc5685c78) at avformat.c:2046
#1  0x00007ffff77bdec7 in switch_core_file_write (fh=0x7fffe402c108, data=0x7fffec009630, data@entry=0x7fffec0d7e98,
    len=len@entry=0x7fffc5685c78) at src/switch_core_file.c:672
#2  0x00007ffff786770e in recording_thread (thread=<optimized out>, obj=<optimized out>) at src/switch_ivr_async.c:1281
#3  0x00007ffff7adae3c in dummy_worker (opaque=0x7fffe84fed30) at threadproc/unix/thread.c:151
#4  0x00007ffff51ed17a in start_thread () from /lib64/libpthread.so.0
#5  0x00007ffff479fdc3 in clone () from /lib64/libc.so.6
(gdb)

遗留细节问题：音视频录制时间戳需要做同步处理，问题产生是因为：

视频是直接使用源端对过来的h264 rtp包，而音频则是重新采用经过编码后转发出去的，那么是不是音频也可以像视频那样修改，不做编解码呢，理论上当然是可以的，不过音频编解码的性能损耗没那么大，所以这里可以不用做处理了。

cpu frame video 编码视频

0 人点赞