HM-16.9
estIntraPredLumaQT - 亮度帧内预测模块
estIntraPredLumaQT 执行流程
遍历当前CU中的每个PU:
- 计算需要进行完整RD率失真优化的模式的数量
numModesForFullRD = g_aucIntraModeNumFast_UseMPM[ uiWidthBit ]
g_aucIntraModeNumFast_UseMPM[] = {3, 8, 8, 3, 3, 3}; 2x2, 4x4, 8x8, 16x16, 32x32, 64x64
- 获取当前PU邻域可用性,对参考像素进行滤波
- 快速搜索 - 得到候选列表 uiRdModeList
- 遍历35种预测模式,选择若干个cost较小的作为最佳预测模式
- 帧内预测
- hadamard变换,计算SATD,计算cost
- 更新候选列表 uiRdModeList
- MPM模式 candModeList3 - uiPreds3
pcCU->getIntraDirPredictor
根据相邻PU得到MPM- 将不存在于候选列表 uiRdModeList 中的模式加入 uiRdModeList
- 遍历35种预测模式,选择若干个cost较小的作为最佳预测模式
- 遍历候选集 uiRdModeList,选出最佳预测模式 uiBestPUMode
- 设置上下文模型
- 变换、量化、编码计算cost,
xRecurIntraCodingLumaQT
中bCheckFirst = true,四叉树递归划分 - 根据cost更新参数
- 最佳预测模式 uiBestPUMode 在 bCheckFirst = false (PU不再划分) 情况下,再计算一次 dPUCost,比较 dPUCost 和 dBestPUCost,更新参数
- 更新亮度失真
- 重建图像,为下一个帧内预测编码块做参考准备
- 更新当前PU数据
- 非快速搜索:35种预测模式均加入候选列表 uiRdModeList
- do while 实现对编码块的遍历
estIntraPredLumaQT 源码
代码语言:txt复制Void
TEncSearch::estIntraPredLumaQT(TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
TComYuv* pcRecoYuv,
Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
DEBUG_STRING_FN_DECLARE(sDebug))
{
const UInt uiDepth = pcCU->getDepth(0); // 当前CU深度
const UInt uiInitTrDepth = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; // 用于计算变换深度,实际深度为uiInitTrDepth uiDepth
const UInt uiNumPU = 1<<(2*uiInitTrDepth);
const UInt uiQNumParts = pcCU->getTotalNumPart() >> 2; // 便于计算当前CU的Zorder坐标 ?
const UInt uiWidthBit = pcCU->getIntraSizeIdx(0);
const ChromaFormat chFmt = pcCU->getPic()->getChromaFormat();
const UInt numberValidComponents = getNumberValidComponents(chFmt);
const TComSPS &sps = *(pcCU->getSlice()->getSPS());
const TComPPS &pps = *(pcCU->getSlice()->getPPS());
Distortion uiOverallDistY = 0;
UInt CandNum;
Double CandCostList[ FAST_UDI_MAX_RDMODE_NUM ]; // 候选cost列表
Pel resiLumaPU[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
Bool bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex )
{
bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
}
bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate());
// Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantisation divisor is 1.
#if FULL_NBIT
const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)))
: m_pcRdCost->getSqrtLambda();
#else
const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (sps.getBitDepth(CHANNEL_TYPE_LUMA) - 8)) / 3.0)))
: m_pcRdCost->getSqrtLambda();
#endif
//===== set QP and clear Cbf =====
if ( pps.getUseDQP() == true)
{
pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
}
else
{
pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
}
//===== loop over partitions =====
TComTURecurse tuRecurseCU(pcCU, 0);
TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);
do
{
const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU();
// for( UInt uiPU = 0, uiPartOffset=0; uiPU < uiNumPU; uiPU , uiPartOffset = uiQNumParts )
//{
//===== init pattern for luma prediction =====
DEBUG_STRING_NEW(sTemp2)
//===== determine set of modes to be tested (using prediction signal only) =====
Int numModesAvailable = 35; //total number of Intra modes
UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];
Int numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled()?g_aucIntraModeNumFast_UseMPM[ uiWidthBit ] : g_aucIntraModeNumFast_NotUseMPM[ uiWidthBit ];
// MPM数目 g_aucIntraModeNumFast_UseMPM[] = {3, 8, 8, 3, 3, 3}; 2x2, 4x4, 8x8, 16x16, 32x32, 64x64
// this should always be true
assert (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y));
// 获取当前PU邻域可用性,对参考样点进行设置及滤波
initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) );
// 快速搜索 doFastSearch 恒为真
Bool doFastSearch = (numModesForFullRD != numModesAvailable);
if (doFastSearch)
{
assert(numModesForFullRD < numModesAvailable);
for( Int i=0; i < numModesForFullRD; i )
{
CandCostList[ i ] = MAX_DOUBLE;
}
CandNum = 0;
const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU();
Pel* piOrg = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx );
Pel* piPred = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx );
UInt uiStride = pcPredYuv->getStride( COMPONENT_Y );
DistParam distParam;
const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0;
m_pcRdCost->setDistParam(distParam, sps.getBitDepth(CHANNEL_TYPE_LUMA), piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard);
distParam.bApplyWeight = false;
// 遍历35种预测模式,选取若干个cost较小作为候选
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx )
{
UInt uiMode = modeIdx;
Distortion uiSad = 0;
const Bool bUseFilter=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiMode, puRect.width, puRect.height, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag());
// 帧内预测
predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) );
// use hadamard transform here
// hadamard -> SATD
uiSad =distParam.DistFunc(&distParam);
UInt iModeBits = 0;
// NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
iModeBits =xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, CHANNEL_TYPE_LUMA );
// 计算cost
Double cost = (Double)uiSad (Double)iModeBits * sqrtLambdaForFirstPass;
#if DEBUG_INTRA_SEARCH_COSTS
std::cout << "1st pass mode " << uiMode << " SAD = " << uiSad << ", mode bits = " << iModeBits << ", cost = " << cost << "n";
#endif
// 更新候选列表
CandNum = xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
}
// MPM模式 - candModeList[i] - NUM_MOST_PROBABLE_MODES = 3
if (m_pcEncCfg->getFastUDIUseMPMEnabled())
{
Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1};
Int iMode = -1;
pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode ); // uiPreds赋值
const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES);
// MPM预测模式不在候选列表中,则添加到模式候选列表中
for( Int j=0; j < numCand; j )
{
Bool mostProbableModeIncluded = false;
Int mostProbableMode = uiPreds[j];
for( Int i=0; i < numModesForFullRD; i )
{
mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]); // 1真即真
}
if (!mostProbableModeIncluded)
{
uiRdModeList[numModesForFullRD ] = mostProbableMode;
}
}
}
}
else
{
for( Int i=0; i < numModesForFullRD; i )
{
uiRdModeList[i] = i;
}
}
//===== check modes (using r-d costs) =====
#if HHI_RQT_INTRA_SPEEDUP_MOD
UInt uiSecondBestMode = MAX_UINT;
Double dSecondBestPUCost = MAX_DOUBLE;
#endif
DEBUG_STRING_NEW(sPU)
UInt uiBestPUMode = 0; // 最优预测模式
Distortion uiBestPUDistY = 0; // 最优预测模式亮度失真值
Double dBestPUCost = MAX_DOUBLE; // 最优预测模式cost
#if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
UInt max=numModesForFullRD;
if (DebugOptionList::ForceLumaMode.isSet())
{
max=0; // we are forcing a direction, so don't bother with mode check
}
for ( UInt uiMode = 0; uiMode < max; uiMode )
#else
for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode ) // 遍历候选集uiRdModeList[]
#endif
{
// set luma prediction mode
UInt uiOrgMode = uiRdModeList[uiMode];
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth uiInitTrDepth );
DEBUG_STRING_NEW(sMode)
// set context models 上下文模型
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
Distortion uiPUDistY = 0; // 当前预测模式亮度失真值
Double dPUCost = 0.0; // 当前预测模式cost
// 多候选模式 变换、量化、编码计算cost
// bCheckFirst: 表示按照四叉树递归划分
#if HHI_RQT_INTRA_SPEEDUP
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#else
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#endif
#if DEBUG_INTRA_SEARCH_COSTS
std::cout << "2nd pass [luma,chroma] mode [" << Int(pcCU->getIntraDir(CHANNEL_TYPE_LUMA, uiPartOffset)) << "," << Int(pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, uiPartOffset)) << "] cost = " << dPUCost << "n";
#endif
// check r-d cost
// 根据cost选取最优,更新参数
if( dPUCost < dBestPUCost )
{
DEBUG_STRING_SWAP(sPU, sMode)
#if HHI_RQT_INTRA_SPEEDUP_MOD
uiSecondBestMode = uiBestPUMode;
dSecondBestPUCost = dBestPUCost;
#endif
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
dBestPUCost = dPUCost;
xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
{
const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex )
{
if (bMaintainResidual[storedResidualIndex])
{
xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
}
}
}
UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() uiPartOffset, uiQPartNum * sizeof( UChar ) );
for (UInt component = 0; component < numberValidComponents; component )
{
const ComponentID compID = ComponentID(component);
::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
}
#if HHI_RQT_INTRA_SPEEDUP_MOD
else if( dPUCost < dSecondBestPUCost )
{
uiSecondBestMode = uiOrgMode;
dSecondBestPUCost = dPUCost;
}
#endif
} // Mode loop
#if HHI_RQT_INTRA_SPEEDUP
#if HHI_RQT_INTRA_SPEEDUP_MOD
for( UInt ui =0; ui < 2; ui )
#endif
{
#if HHI_RQT_INTRA_SPEEDUP_MOD
UInt uiOrgMode = ui ? uiSecondBestMode : uiBestPUMode;
if( uiOrgMode == MAX_UINT )
{
break;
}
#else
UInt uiOrgMode = uiBestPUMode; // 模式为最优预测模式
#endif
#if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
if (DebugOptionList::ForceLumaMode.isSet())
{
uiOrgMode = DebugOptionList::ForceLumaMode.getInt();
}
#endif
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth uiInitTrDepth );
DEBUG_STRING_NEW(sModeTree)
// set context models
m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );
// determine residual for partition
Distortion uiPUDistY = 0;
Double dPUCost = 0.0;
// 使用最优模式,变换、量化、编码计算cost
// bCheckFirst: false - PU不再划分
xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree));
// check r-d cost
// 检测同一模式下 bCheckFirst - true or fasle is better
if( dPUCost < dBestPUCost )
{
DEBUG_STRING_SWAP(sPU, sModeTree)
uiBestPUMode = uiOrgMode;
uiBestPUDistY = uiPUDistY;
dBestPUCost = dPUCost;
xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );
if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
{
const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex )
{
if (bMaintainResidual[storedResidualIndex])
{
xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
}
}
}
const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
::memcpy( m_puhQTTempTrIdx, pcCU->getTransformIdx() uiPartOffset, uiQPartNum * sizeof( UChar ) );
for (UInt component = 0; component < numberValidComponents; component )
{
const ComponentID compID = ComponentID(component);
::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID ) uiPartOffset, uiQPartNum * sizeof( UChar ) );
::memcpy( m_puhQTTempTransformSkipFlag[compID], pcCU->getTransformSkip(compID) uiPartOffset, uiQPartNum * sizeof( UChar ) );
}
}
} // Mode loop
#endif
DEBUG_STRING_APPEND(sDebug, sPU)
//--- update overall distortion ---
uiOverallDistY = uiBestPUDistY;
//--- update transform index and cbf ---
const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
::memcpy( pcCU->getTransformIdx() uiPartOffset, m_puhQTTempTrIdx, uiQPartNum * sizeof( UChar ) );
for (UInt component = 0; component < numberValidComponents; component )
{
const ComponentID compID = ComponentID(component);
::memcpy( pcCU->getCbf( compID ) uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
::memcpy( pcCU->getTransformSkip( compID ) uiPartOffset, m_puhQTTempTransformSkipFlag[compID ], uiQPartNum * sizeof( UChar ) );
}
//--- set reconstruction for next intra prediction blocks ---
if( !tuRecurseWithPU.IsLastSection() )
{
const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
const UInt uiCompWidth = puRect.width;
const UInt uiCompHeight = puRect.height;
const UInt uiZOrder = pcCU->getZorderIdxInCtu() uiPartOffset;
Pel* piDes = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
const UInt uiDesStride = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y);
const Pel* piSrc = pcRecoYuv->getAddr( COMPONENT_Y, uiPartOffset );
const UInt uiSrcStride = pcRecoYuv->getStride( COMPONENT_Y);
for( UInt uiY = 0; uiY < uiCompHeight; uiY , piSrc = uiSrcStride, piDes = uiDesStride )
{
for( UInt uiX = 0; uiX < uiCompWidth; uiX )
{
piDes[ uiX ] = piSrc[ uiX ];
}
}
}
//=== update PU data ====
pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth uiInitTrDepth );
} while (tuRecurseWithPU.nextSection(tuRecurseCU));
if( uiNumPU > 1 )
{ // set Cbf for all blocks
UInt uiCombCbfY = 0;
UInt uiCombCbfU = 0;
UInt uiCombCbfV = 0;
UInt uiPartIdx = 0;
for( UInt uiPart = 0; uiPart < 4; uiPart , uiPartIdx = uiQNumParts )
{
uiCombCbfY |= pcCU->getCbf( uiPartIdx, COMPONENT_Y, 1 );
uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
}
for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs )
{
pcCU->getCbf( COMPONENT_Y )[ uiOffs ] |= uiCombCbfY;
pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
}
}
//===== reset context models =====
m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
//===== set distortion (rate and r-d costs are determined later) =====
pcCU->getTotalDistortion() = uiOverallDistY;
}
luma