SkeyeRTSPLive高效转码之SkeyeVideoEncoder高效硬件编码解决方案(附源码)(2)

2023-04-23 14:21:37 浏览数 (2)

在之前的《SkeyeRTSPLive高效转码之SkeyeVideoDecoder高效解码》系列文章中我们已经将视频解码成了原始图像数据(YUV/RGB),然后根据不同的转码需求进行编码。如视频分辨率缩放,调整码率,多码率输出等;为了解决转码过程中编码高分辨率高质量或者高压缩率(如H265)耗时的问题,我们采用Nvidia硬件驱动编码器进行编码,以追求最高效率的转码和最低的推送延迟。

SkeyeVideoEncoder基Nvidia独立显卡的硬件编码库SkeyeNvEncoder
1. 接口声明如下:
代码语言:txt复制
class SkeyeNvEncoder
{
public:
	//codec: 编码格式 0=h264, 1=h265/hevc
	int InitNvEncoder(int width,int height,int fps=25, int bitrate=4096, int gop=50, int qp=28, int rcMode=/*NV_ENC_PARAMS_RC_2_PASS_QUALITY*/NV_ENC_PARAMS_RC_CONSTQP, 
		char* encoderPreset = "Default", int codec = 0,int nDeviceType=0, int nDeviceID=0 );
		
	//H264获取SPS和PPS	
	int GetSPSAndPPS(unsigned char*sps,long&spslen,unsigned char*pps,long&ppslen);
	
	//H265获取VPS,SPS和PPS	
	int GetH265VPSSPSAndPPS(unsigned char*vps, long&vpslen, unsigned char*sps, long&spslen, unsigned char*pps, long&ppslen);

	// 编码InputFormat我们固定为YUV420PL(I420),可修改为NV12, YUY2 等等在Init()时进行格式转换, [12/18/2016 dingshuai]
	unsigned char* NvEncodeSync(unsigned char* pYUV420, int inLenth, int& outLenth, bool& bKeyFrame);

	//关闭编码器,停止编码
	int CloseNvEncoder();
};
2. SkeyeNvEncoder编码库调用流程
  • 第一步,初始化编码器及其参数
代码语言:txt复制
//初始化编码器参数
int InitNvEncoder(int width,int height,int fps, int bitrate, int gop,  
	int qp, int rcMode,  char* encoderPreset , int codec, int nDeviceType, int nDeviceID)
{
	//初始化设置参数 -- Start
	memset(&m_encodeConfig, 0, sizeof(EncodeConfig));
	m_encodeConfig.width = width;
	m_encodeConfig.height = height;
	m_nVArea = width*height;
	m_nCheckyuvsize = m_nVArea*3/2;

	//编码器识别的码率是bps, 但是我们输入的是kbps, so*1024
	m_encodeConfig.bitrate = bitrate*1024;

	//多通道编码优化图像质量只有在低延迟模式下工作(LOW_LATENCY)
	m_encodeConfig.rcMode = rcMode;//NV_ENC_PARAMS_RC_2_PASS_QUALITY

	m_encodeConfig.encoderPreset = encoderPreset;	//NV_ENC_PARAMS_RC_2_PASS_QUALITY;
	//默认指定低延时模式以及图像的压缩格式(HQ,HP,LOSSLESS ......)
	m_encodeConfig.presetGUID = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;

	// I帧间隔 [12/16/2016 dingshuai]
	m_encodeConfig.gopLength = gop;//NVENC_INFINITE_GOPLENGTH;

	//CUDA 
	m_encodeConfig.deviceType = nDeviceType;
	m_encodeConfig.deviceID = nDeviceID;

	m_encodeConfig.codec =  codec;//NV_ENC_H264;
	m_encodeConfig.fps = fps;
	m_encodeConfig.qp = qp;
	m_encodeConfig.i_quant_factor = DEFAULT_I_QFACTOR;
	m_encodeConfig.b_quant_factor = DEFAULT_B_QFACTOR;  
	m_encodeConfig.i_quant_offset = DEFAULT_I_QOFFSET;
	m_encodeConfig.b_quant_offset = DEFAULT_B_QOFFSET; 

	m_encodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;

	//编码异步输出模式, 1-异步 0-同步
	m_encodeConfig.enableAsyncMode = 0;
	//默认输入给编码器的格式为NV12(所以需要格式转换:YUV420->NV12)
	m_encodeConfig.inputFormat = NV_ENC_BUFFER_FORMAT_NV12;

	//暂不知道这些参数什么用
	m_encodeConfig.invalidateRefFramesEnableFlag = 0;
	m_encodeConfig.endFrameIdx = INT_MAX;

	//没有B帧,且目前编码器也不支持B帧,设了也没用
	m_encodeConfig.numB = 0;
	if (m_encodeConfig.numB > 0)
	{
		//PRINTERR("B-frames are not supportedn");
		return -1;
	}

	// 其他参数,欢迎补充...... [12/18/2016 dingshuai]
	//
	// 
	//初始化设置参数 -- END

	//初始化编码器 -- Start
	NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
	switch (m_encodeConfig.deviceType)
	{
#if defined(NV_WINDOWS)
	case NV_ENC_DX9:
		nvStatus = InitD3D9(m_encodeConfig.deviceID);
		break;

	case NV_ENC_DX10:
		nvStatus = InitD3D10(m_encodeConfig.deviceID);
		break;

	case NV_ENC_DX11:
		nvStatus = InitD3D11(m_encodeConfig.deviceID);
		break;
#endif
	// initialize Cuda
	case NV_ENC_CUDA:
		InitCuda(m_encodeConfig.deviceID,0);
		break;
	}
	if (nvStatus != NV_ENC_SUCCESS)
		return -1;

	if (m_encodeConfig.deviceType != NV_ENC_CUDA)
		nvStatus = m_pNvHWEncoder->Initialize(m_pDevice, NV_ENC_DEVICE_TYPE_DIRECTX);
	else
		nvStatus = m_pNvHWEncoder->Initialize(m_pDevice, NV_ENC_DEVICE_TYPE_CUDA);

	if (nvStatus != NV_ENC_SUCCESS)
		return 1;
	//nvStatus = InitCuda(m_encodeConfig.deviceID, 0);

	//nvStatus = m_pNvHWEncoder->Initialize((void*)m_cuContext, NV_ENC_DEVICE_TYPE_CUDA);
	//if (nvStatus != NV_ENC_SUCCESS)
	//	return -2;

	m_encodeConfig.presetGUID = m_pNvHWEncoder->GetPresetGUID(m_encodeConfig.encoderPreset, m_encodeConfig.codec);
	
	nvStatus = m_pNvHWEncoder->CreateEncoder(&m_encodeConfig);
	if (nvStatus != NV_ENC_SUCCESS)
	{
		Deinitialize();
		return -3;
	}

	// 编码缓存帧数 [12/16/2016 dingshuai]
	uint32_t uEncodeBufferCount = 1;
	//分配编码缓冲区
	nvStatus = AllocateIOBuffers(m_pNvHWEncoder->m_uMaxWidth, m_pNvHWEncoder->m_uMaxHeight, uEncodeBufferCount);
	if (nvStatus != NV_ENC_SUCCESS)
		return -4;

	m_spslen = 0;
	m_ppslen = 0;
	memset(m_sps, 0x00, 100);
	memset(m_pps, 0x00, 100);
	m_bWorking = true;
	return 1;
}

其中,我们需要设置编码格式(0=H264,1=H265目前只支持这两种格式),视频分辨率,帧率,码率和I帧间隔(Gop),编码质量以及硬件编码器相关参数,参数详解如下:

代码语言:txt复制
//rcMode: Rate Control Modes(编码码率/质量控制模式),详见如下枚举:
代码语言:txt复制
					// 	typedef enum _NV_ENC_PARAMS_RC_MODE
代码语言:txt复制
					// 	{
代码语言:txt复制
					// 		NV_ENC_PARAMS_RC_CONSTQP                = 0x0,       /**< Constant QP mode */
代码语言:txt复制
					// 		NV_ENC_PARAMS_RC_VBR                    = 0x1,       /**< Variable bitrate mode */
代码语言:txt复制
					// 		NV_ENC_PARAMS_RC_CBR                    = 0x2,       /**< Constant bitrate mode */
代码语言:txt复制
					// 		NV_ENC_PARAMS_RC_VBR_MINQP              = 0x4,       /**< Variable bitrate mode with MinQP */
代码语言:txt复制
					// 		NV_ENC_PARAMS_RC_2_PASS_QUALITY         = 0x8,       /**< Multi pass encoding optimized for image quality and works only with low latency mode */
代码语言:txt复制
					// 		NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP   = 0x10,      /**< Multi pass encoding optimized for maintaining frame size and works only with low latency mode */
代码语言:txt复制
					// 	}
代码语言:txt复制
//encoderPreset: 编码预设
代码语言:txt复制
					// 预设编码器编码图像的延时和清晰度
代码语言:txt复制
					// if (encoderPreset && (stricmp(encoderPreset, "HQ") == 0))
代码语言:txt复制
					// else if (encoderPreset && (stricmp(encoderPreset, "LowLatencyHP") == 0))
代码语言:txt复制
					// else if (encoderPreset && (stricmp(encoderPreset, "HP") == 0))
代码语言:txt复制
					// else if (encoderPreset && (stricmp(encoderPreset, "LowLatencyHQ") == 0))
代码语言:txt复制
					// else if (encoderPreset && (stricmp(encoderPreset, "BD") == 0))
代码语言:txt复制
					// else if (encoderPreset && (stricmp(encoderPreset, "LOSSLESS") == 0))
代码语言:txt复制
					// 	else if (encoderPreset && (stricmp(encoderPreset, "LowLatencyDefault") == 0))
代码语言:txt复制
					// else if (encoderPreset && (stricmp(encoderPreset, "LosslessDefault") == 0))
代码语言:txt复制
					//	详见nvEncoderAPI.h  /*   Preset GUIDS supported by the NvEncodeAPI interface.  */		
  • 第二步,获取编码信息参数 如果编码格式为H264,我们通过GetSPSAndPPS获取编码信息头SPS和PPS,如下代码段所示:
代码语言:txt复制
//获取SPS和PPS	
int GetSPSAndPPS(unsigned char*sps,long&spslen,unsigned char*pps,long&ppslen)
{
		if (!m_bWorking)
		{
			return -1;
		}

		if (m_spslen == 0 || m_ppslen == 0)
		{
			unsigned char* pEncData = NULL;
			int nDataSize = 0;
			bool bKeyFrame = false;
			unsigned char* pTempBuffer = new unsigned char[m_nCheckyuvsize];
			memset(pTempBuffer, 0x00, m_nCheckyuvsize);
			pEncData = NvEncodeSync(pTempBuffer, m_nCheckyuvsize, nDataSize, bKeyFrame);
			if (pEncData && nDataSize>0)
			{
				GetH264SPSandPPS((char*)pEncData, nDataSize, (char*)m_sps, (int*)&m_spslen, (char*)m_pps, (int*)&m_ppslen);
			}
			m_encPicCommand.bForceIDR = 1;
			if (pTempBuffer)
			{
				delete[] pTempBuffer;
				pTempBuffer = NULL;
			}
		}

		if (m_spslen>0&&m_ppslen>0)
		{
			memcpy(sps, m_sps, m_spslen);
			memcpy(pps, m_pps, m_ppslen);
			spslen = m_spslen;
			ppslen = m_ppslen;
		}
		return 1;
}

如果编码格式为H265,我们通过GetH265VPSSPSAndPPS获取编码信息头VPS,SPS和PPS,如下代码段所示:

代码语言:txt复制
int GetH265VPSSPSAndPPS(unsigned char*vps, long&vpslen, unsigned char*sps, 
	long&spslen, unsigned char*pps, long&ppslen)
{
	if (!m_bWorking)
	{
		return -1;
	}

	if (m_spslen == 0 || m_ppslen == 0)
	{
		unsigned char* pEncData = NULL;
		int nDataSize = 0;
		bool bKeyFrame = false;
		unsigned char* pTempBuffer = new unsigned char[m_nCheckyuvsize];
		memset(pTempBuffer, 0x00, m_nCheckyuvsize);
		pEncData = NvEncodeSync(pTempBuffer, m_nCheckyuvsize, nDataSize, bKeyFrame);
		if (pEncData && nDataSize>0)
		{
			GetH265VPSandSPSandPPS((char*)pEncData, nDataSize, (char*)m_vps, (int*)&m_vpslen, (char*)m_sps, (int*)&m_spslen, (char*)m_pps, (int*)&m_ppslen);
		}
		m_encPicCommand.bForceIDR = 1;
		if (pTempBuffer)
		{
			delete[] pTempBuffer;
			pTempBuffer = NULL;
		}
	}

	spslen = m_spslen;
	ppslen = m_ppslen;
	vpslen = m_vpslen;
	if (m_spslen > 0)
		memcpy(sps, m_sps, m_spslen);

	if(m_ppslen>0)
		memcpy(pps, m_pps, m_ppslen);
	if(m_vpslen)
		memcpy(vps, m_vps, m_vpslen);

	return 1;
}
代码语言:javascript复制
第三步,调用编码函数进行视频帧编码
编码输入格式InputFormat我们固定为YUV420PL(I420),如源图像色彩格式为NV12, YUY2 等,需要在传入编码器时进行格式转换。 unsigned char* NvEncodeSync(unsigned char* pYUV420, int inLenth, int& outLenth, bool& bKeyFrame)
{
	if(	!m_bWorking  || inLenth !=m_nCheckyuvsize)//初始化尚未完成,或者传入的数据不满足YUV数据的长度,则返回错误
	{
		outLenth = 0;
		return NULL;
	}

	NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
	bool bError = false;
	EncodeBuffer* pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();

	EncodeFrameConfig stEncodeFrame;
	memset(&stEncodeFrame, 0, sizeof(stEncodeFrame));
	stEncodeFrame.yuv[0] = pYUV420;//Y
	stEncodeFrame.yuv[1] = pYUV420 m_nVArea;//U
	stEncodeFrame.yuv[2] = pYUV420 m_nVArea (m_nVArea>>2);//V

	int nHelfWidth = m_encodeConfig.width >> 1;
	stEncodeFrame.stride[0] = m_encodeConfig.width;
	stEncodeFrame.stride[1] = nHelfWidth;
	stEncodeFrame.stride[2] = nHelfWidth;
	stEncodeFrame.width = m_encodeConfig.width;
	stEncodeFrame.height = m_encodeConfig.height;

	if (m_encodeConfig.deviceType == 0)//CUDA
	{
		//CUDA Lock 
		CCudaAutoLock cuLock((CUcontext)m_pDevice);//m_cuContext

		nvStatus = PreProcessInput(pEncodeBuffer, stEncodeFrame.yuv, stEncodeFrame.width, stEncodeFrame.height,
			m_pNvHWEncoder->m_uCurWidth, m_pNvHWEncoder->m_uCurHeight,
			m_pNvHWEncoder->m_uMaxWidth, m_pNvHWEncoder->m_uMaxHeight);
		if (nvStatus != NV_ENC_SUCCESS)
		{
			outLenth = 0;
			return NULL;
		}
		nvStatus = m_pNvHWEncoder->NvEncMapInputResource(pEncodeBuffer->stInputBfr.nvRegisteredResource, &pEncodeBuffer->stInputBfr.hInputSurface);
		if (nvStatus != NV_ENC_SUCCESS)
		{
			PRINTERR("Failed to Map input buffer %pn", pEncodeBuffer->stInputBfr.hInputSurface);
			bError = true;
			outLenth = 0;
			return NULL;
		}
	}
	else//DirectX or any others
	{
		unsigned char *pInputSurface = NULL;
		uint32_t lockedPitch = 0;
		while (pInputSurface == NULL)
		{
			nvStatus = m_pNvHWEncoder->NvEncLockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface, (void**)&pInputSurface, &lockedPitch);
			if (nvStatus != NV_ENC_SUCCESS)
				return NULL;
			if (pInputSurface == NULL)
			{
				nvStatus = m_pNvHWEncoder->NvEncUnlockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface);
				if (nvStatus != NV_ENC_SUCCESS)
					return NULL;
				Sleep(1);
			}
		}
		if (pEncodeBuffer->stInputBfr.bufferFmt == NV_ENC_BUFFER_FORMAT_NV12_PL)
		{
			unsigned char *pInputSurfaceCh = pInputSurface   (pEncodeBuffer->stInputBfr.dwHeight*lockedPitch);
			CmnConvertYUVtoNV12(stEncodeFrame.yuv[0], stEncodeFrame.yuv[1], stEncodeFrame.yuv[2], pInputSurface, 
				pInputSurfaceCh, stEncodeFrame.width, stEncodeFrame.height, stEncodeFrame.width, lockedPitch);
		}

	}
	nvStatus = m_pNvHWEncoder->NvEncEncodeFrame(pEncodeBuffer, &m_encPicCommand, m_encodeConfig.width, m_encodeConfig.height,
		NV_ENC_PIC_STRUCT_FRAME, m_qpDeltaMapArray, m_qpDeltaMapArraySize);
	if (nvStatus != NV_ENC_SUCCESS)
	{
		bError = true;
		outLenth= 0;
		return NULL;
	}

	pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
	if (!pEncodeBuffer)
	{
		pEncodeBuffer = m_EncodeBufferQueue.GetPending();

		// 获取编码的h264/h265数据 [12/15/2016 dingshuai]
		nvStatus = m_pNvHWEncoder->ProcessOutput(pEncodeBuffer, m_pOutputBuffer, m_nOutputBufLen);
		if(nvStatus != NV_ENC_SUCCESS)
		{
			bError = true;
			outLenth= 0;
		}

		if (m_encodeConfig.deviceType == 0)//CUDA
		{
			// UnMap the input buffer after frame done
			if (pEncodeBuffer->stInputBfr.hInputSurface)
			{
				nvStatus = m_pNvHWEncoder->NvEncUnmapInputResource(pEncodeBuffer->stInputBfr.hInputSurface);
				pEncodeBuffer->stInputBfr.hInputSurface = NULL;
			}
			//pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
		}
		else
		{
			nvStatus = m_pNvHWEncoder->NvEncUnlockInputBuffer(pEncodeBuffer->stInputBfr.hInputSurface);
			if (nvStatus != NV_ENC_SUCCESS)
				return NULL;
		}
	}
	else
	{
		outLenth= 0;
		return NULL;
	}
	
	if (m_encPicCommand.bForceIDR)
	{
		m_encPicCommand.bForceIDR = 0;
	}

	outLenth = m_nOutputBufLen;
	return m_pOutputBuffer;
}

  • 第四步,关闭编码器,释放编码器申请的内存和显卡资源
代码语言:txt复制
int CloseNvEncoder()
{
	m_bWorking = false; 
    NVENCSTATUS nvStatus = NV_ENC_SUCCESS;

    ReleaseIOBuffers();

    m_pNvHWEncoder->NvEncDestroyEncoder();

	if (m_cuContext)
	{
		__cu(cuCtxDestroy(m_cuContext));
	}

    return nvStatus;
}

0 人点赞