WebRTC源码阅读——视频组帧

本文分析了Google WebRTC 视频组帧的相关源码，给出了视频组帧的处理流程分析，为避免文章内容过多，文中对于关键函数的分析仅给出关键内容的说明，没有贴完整的源代码。文中所分析内容均基于WebRTC M86版本。

视频组帧

1.概括

组帧：视频一帧数据往往被拆分为多个packet进行发送，组帧是将接收到的packets重组为视频帧。组帧的关键在于找到视频帧的起始与终止packet。对于h264编码的视频帧，rtp传输时没有明确的起始标志，webrtc在处理时以判断连续序列号的时间戳是否相同为依据，若不相同则认为找到了视频帧的起始packet。视频帧的结束标识为rtp包的header中的Mark标志位。对于vp8、vp9则可以从rtp包中解析到明确的帧开始与结束标识符。组帧结束后，拿到完整的视频帧数据，之后对该视频帧数据进行参考帧信息设置，随后送入frameBuffer，以便从中取帧进行解码。

2.关键函数说明

本文内容着重分析webrtc源码中的rtp_video_stream_receiver2.cc、packet_buffer.cc文件的组帧部分。

RtpVideoStreamReceiver2接收到packet后，调用PacketBuffer::InsertPacket将packet进行存储并查找packet所在的帧以及之后帧的完整包数据，若找到该函数会返回完整视频帧的所有packets。若返回结果存在完整的视频帧，则继续由RtpVideoStreamReceiver2::OnInsertedPacket完成组帧。

packet_buffer.cc

packet_buffer使用buffer_记录了当前插入的所有packet，使用missing_packets_记录当前所丢失的包序号。

PacketBuffer::InsertResult PacketBuffer::InsertPacket( std::unique_ptr<PacketBuffer::Packet> packet)

代码语言：txt复制

//利用packet的序列号计算出该packet存放于buffer_的位置
uint16_t seq_num = packet->seq_num;
size_t index = seq_num % buffer_.size();

//若buffer_[index]的值不为空，则按照序列号判断是否为同一packet,若是则返回，不是则不断扩充buffer_的容量，直到buffer_容量达到上限或packet待存放的位置未存储内容，若扩充达到上限依旧无法存放packet，则清除buffer_的内容后，直接返回。

 if (buffer_[index] != nullptr) {
    // Duplicate packet, just delete the payload.
    if (buffer_[index]->seq_num == packet->seq_num) {
      return result;
    }

    // The packet buffer is full, try to expand the buffer.
    while (ExpandBufferSize() && buffer_[seq_num % buffer_.size()] != nullptr) {
    }
    index = seq_num % buffer_.size();

    // Packet buffer is still full since we were unable to expand the buffer.
    if (buffer_[index] != nullptr) {
      // Clear the buffer, delete payload, and return false to signal that a
      // new keyframe is needed.
      RTC_LOG(LS_WARNING) << "Clear PacketBuffer and request key frame.";
      ClearInternal();
      result.buffer_cleared = true;
      return result;
    }
  }
  
//若buffer_[index]的值为空,则将packet存入buffer_，并且更新missing_packets_丢包记录，遍历buffer_找出当前packet所在的视频帧及其之后帧的所有packets。
  
packet->continuous = false;
buffer_[index] = std::move(packet);

UpdateMissingPackets(seq_num);

result.packets = FindFrames(seq_num);

void PacketBuffer::UpdateMissingPackets(uint16_t seq_num)

代码语言：txt复制

//newest_inserted_seq_num_用于记录当前missing_packets_所插入的最新的序号，若seq_num比newest_inserted_seq_num_还要新，则说明seq_num与newest_inserted_seq_num_之间存在丢包。所以删除missing_packets_中从0开始到seq_num往前的1000个数据，并且不断更新newest_inserted_seq_num_值，并插入丢包的序列号到missing_packets_，直到newest_inserted_seq_num_为seq_num。

 const int kMaxPaddingAge = 1000;
 if (AheadOf(seq_num, *newest_inserted_seq_num_)) {
	uint16_t old_seq_num = seq_num - kMaxPaddingAge;
	auto erase_to = missing_packets_.lower_bound(old_seq_num);
	missing_packets_.erase(missing_packets_.begin(), erase_to);
	...
	while (AheadOf(seq_num, *newest_inserted_seq_num_)) {
	  missing_packets_.insert(*newest_inserted_seq_num_);
	    *newest_inserted_seq_num_;
	}
}

bool PacketBuffer::PotentialNewFrame(uint16_t seq_num) const

代码语言：txt复制

// Test if all previous packets has arrived for the given sequence number.按照官方注释译为判断是否给定seq_num之前的包都已经接收到。其具体实现其实是判断seq_num在buffer_存储index的packet与prev_index（index > 0 ? index - 1 : buffer_.size() - 1）对应packet的连续性 。当buffer[index]为一帧中的第一个packet或buffer[prev_index]->continuous = true时，该函数返回true，其他情况下比如两者序列号不符合连续条件，两者时间戳不相等都返回false。

bool PacketBuffer::PotentialNewFrame(uint16_t seq_num) const {
  size_t index = seq_num % buffer_.size();
  int prev_index = index > 0 ? index - 1 : buffer_.size() - 1;
  const auto& entry = buffer_[index];
  const auto& prev_entry = buffer_[prev_index];

  if (entry == nullptr)
    return false;
  if (entry->seq_num != seq_num)
    return false;
  if (entry->is_first_packet_in_frame())
    return true;
  if (prev_entry == nullptr)
    return false;
  if (prev_entry->seq_num != static_cast<uint16_t>(entry->seq_num - 1))
    return false;
  if (prev_entry->timestamp != entry->timestamp)
    return false;
  if (prev_entry->continuous)
    return true;

  return false;
}

std::vector<std::unique_ptr<PacketBuffer::Packet>> PacketBuffer::FindFrames( uint16_t seq_num)

代码语言：txt复制

//遍历buffer_查找完整帧的包
 for (size_t i = 0; i < buffer_.size() && PotentialNewFrame(seq_num);   i) {
 ...
 	size_t index = seq_num % buffer_.size();
    buffer_[index]->continuous = true;
    //当找到一帧的最后一个包时，利用while(true)向前查找一帧的第一个包的序列号start_seq_num
    if (buffer_[index]->is_last_packet_in_frame()) {
     	 uint16_t start_seq_num = seq_num;
     	 int start_index = index;
     	 size_t tested_packets = 0;
		 ...
      	 int64_t frame_timestamp = buffer_[start_index]->timestamp;
    	 ...
    	 while (true) {
    	 	   tested_packets;
    	 	 //非h264编码依据packet->is_first_packet_in_frame()判断是否找到帧的第一个包
	    	 if (!is_h264 && buffer_[start_index]->is_first_packet_in_frame())
	          break;
	    	 ...
	    	 
	    	 if (tested_packets == buffer_.size())
             break;
          
	    	 start_index = start_index > 0 ? start_index - 1 : buffer_.size() - 1;
	    	 //对于h264没有确切的一帧起始标识，所以利用时间戳是否相等，判断是否找到一帧的起始包
	    	 if (is_h264 && (buffer_[start_index] == nullptr ||
	                        buffer_[start_index]->timestamp != frame_timestamp)) {
	          break;
	        }
	    	  --start_seq_num;
    	 }
    	 if (is_h264) {
    	 	...
    	 	//如果不属于h264的关键帧，并且在start_seq_num位置之前存在丢包，则直接返回
    	 	if (!is_h264_keyframe && missing_packets_.upper_bound(start_seq_num) !=
                                     missing_packets_.begin()) {
          	return found_frames;
        	}
    	 }
    	 //将查找到的一帧所有包存储到found_frames中
    	 const uint16_t end_seq_num = seq_num   1;
    	 for (uint16_t i = start_seq_num; i != end_seq_num;   i) {
	        std::unique_ptr<Packet>& packet = buffer_[i % buffer_.size()];
	        RTC_DCHECK(packet);
	        RTC_DCHECK_EQ(i, packet->seq_num);
	        // Ensure frame boundary flags are properly set.
	        packet->video_header.is_first_packet_in_frame = (i == start_seq_num);
	        packet->video_header.is_last_packet_in_frame = (i == seq_num);
	        found_frames.push_back(std::move(packet));
      	 }
		 //删除seq_num之前的丢包记录 
        missing_packets_.erase(missing_packets_.begin(),
                             missing_packets_.upper_bound(seq_num));
    	 
    }
  	  seq_num;
 }
 return found_frames;

上述过程即为组帧的主要逻辑，剩余组帧部分就是将packets转换为RtpFrameObject类型的对象。关于上述packet_buffer的处理，这里讨论几点问题，以下属于个人思考，不一定准确，大家可以一起讨论看看。

1.上述处理逻辑找到的packets真的是一帧数据所有的packets么？

个人认为对于h264上述FindFrames的处理逻辑存在缺陷，h264编码的packet没有明确的起始标识符，在PacketBuffer::PotentialNewFrame函数中判断条件保障了一定可以找到帧的起始packet。但h264的packet->is_first_packet_in_frame()不准。

代码语言：txt复制

（bool is_first_packet_in_frame() const {
   return video_header.is_first_packet_in_frame;
 }）

可以在video_rtp_depacketizer_h264.cc文件看到，is_first_packet_in_frame赋值并不一定准确。

代码语言：txt复制

absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
    rtc::CopyOnWriteBuffer rtp_payload) {
    ...
     parsed_payload->video_header.is_first_packet_in_frame = true;
    ...
}

absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ParseFuaNalu(
    rtc::CopyOnWriteBuffer rtp_payload) {
  	...
  	bool first_fragment = (rtp_payload.cdata()[1] & kSBit) > 0;
  	...
   parsed_payload->video_header.is_first_packet_in_frame = first_fragment;
   ...
 }

所以个人认为对于h264,并不能保证一定可以找到起始包，假如目前真的没有收到起始包，FindFrames函数中的while(true)循环由于非时间戳不一致而终止，那么此时start_seq_num不一定代表起始包序列号，while(true)循环里找到的若不是真正的起始包序列号，那么说明start_seq_num前存在丢包，这时对于非关键帧，有如下机制可以保证对找到的packets不进行处理：

代码语言：txt复制

if (!is_h264_keyframe && missing_packets_.upper_bound(start_seq_num) !=
                                     missing_packets_.begin()) {
	return found_frames;
}

但对于关键帧呢？怎么保障？这里还没有阅读过视频RTP包的发送逻辑，所以不是很肯定。若是对于关键帧都是以H264::NaluType::kFuA类型发送RTP包，那么这里应该不会存在太大问题（默认解析kFuA类型的packet时拿到的is_first_packet_in_frame准确）。

上述逻辑在master分支最新内容上依旧未有变动。

为避免上述问题存在，个人认为FindFrames这里应该添加一个标识符，用于表示是否真的找到起始包，在while(true)中，对于h264若满足时间戳不一致导致的break,那么记标识符为true,后面当检测到当前标识符为true，则再添加packets到found_frames。

2.PacketBuffer::PotentialNewFrame判断顺序可否更改？

不可以，条件entry->is_first_packet_in_frame()表明只要是属于一帧的起始包，就可以进行完整帧包的查找，若把时间戳等判断条件提前，那么FindFrames函数可能永远不会继续向下执行。这里的顺序也保障了一次FindFrames函数调用可以返回多个帧的packets。

3.PacketBuffer::FindFrames中关于missing_packets_.erase(missing_packets_.begin(), missing_packets_.upper_bound(seq_num))的处理合适么？

个人感觉不是很合理，函数执行到此处，对于除了h264非关键帧的情况，只能表示start_seq_num与seq_num之间不存在丢包。所以这里从begin开始清除，感觉逻辑有点问题。不过对处理并不影响，只是提前清除了missing_packets_中相关丢包的记录。

rtp_video_stream_receiver2.cc

packet_buffer返回待处理的packets(result.packets)后，传递到RtpVideoStreamReceiver2::OnInsertedPacket进行组帧的最后处理。

void RtpVideoStreamReceiver2::OnInsertedPacket( video_coding::PacketBuffer::InsertResult result)

代码语言：txt复制

//遍历result.packets
for (auto& packet : result.packets) {
 	if (packet->is_first_packet_in_frame()) {
 		...
 		payloads.clear();
      	packet_infos.clear();
 	}
	...
    payloads.emplace_back(packet->video_payload);
    packet_infos.push_back(packet->packet_info);
	...
	//若此packet为帧的结束packet,则进行转换
	 if (packet->is_last_packet_in_frame()) {
	 	...
	 	//将全部的video_payload拼接合成EncodedImageBuffer
	 	rtc::scoped_refptr<EncodedImageBuffer> bitstream =
          depacketizer_it->second->AssembleFrame(payloads);
      	...
      	//利用上述过程结果，将一帧数据的packets转换为RtpFrameObject类型对象（至此组帧完成），并交由OnAssembledFrame进行下一步处理。
      	 OnAssembledFrame(std::make_unique<video_coding::RtpFrameObject>(
          first_packet->seq_num,                    //
          last_packet.seq_num,                      //
          last_packet.marker_bit,                   //
          max_nack_count,                           //
          min_recv_time,                            //
          max_recv_time,                            //
          first_packet->timestamp,                  //
          first_packet->ntp_time_ms,                //
          last_packet.video_header.video_timing,    //
          first_packet->payload_type,               //
          first_packet->codec(),                    //
          last_packet.video_header.rotation,        //
          last_packet.video_header.content_type,    //
          first_packet->video_header,               //
          last_packet.video_header.color_space,     //
          RtpPacketInfos(std::move(packet_infos)),  //
          std::move(bitstream)));
	 }

}
//当packet_buffer插入packet发现buffer_已经再无法添加元素时，会清空buffer_，设置result.buffer_cleared标识为true，故此时需要重新请求关键帧。
if (result.buffer_cleared) {
	RequestKeyFrame();
}

实时音视频

0 人点赞