[AV1] AV1 帧内预测

这篇博客主要对AV1的帧内预测编码部分的代码进行分析。

AV1同其他标准一样，预测是为了生成当前块的预测器（Predictor），然后把当前块像素值与预测器的差值传递给编码器的下一个阶段。

在AV1的参考软件libaom-av1中，帧内预测从函数 av1_predict_intra_block_facade() 开始。

代码语言：javascript复制

//AV1 帧内预测起始函数
void av1_predict_intra_block_facade(const AV1_COMMON* cm, MACROBLOCKD* xd, int plane, int blk_col, int blk_row, TX_SIZE tx_size)
{
    const MB_MODE_INFO* const mbmi = xd->mi[0];
    // 获取当前的plane（Y，U，V其中之一）	里面包含每个plane的信息包括pixel值
    struct macroblockd_plane* const pd = &xd->plane[plane];
    const int dst_stride = pd->dst.stride;
    uint8_t* dst = &pd->dst.buf[(blk_row * dst_stride   blk_col) << MI_SIZE_LOG2];
    // 获取预测模式（Encoder的话是上级函数有循环每个预测模式）
    const PREDICTION_MODE mode = (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
    const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
    // 是否采用 filtering intra的预测模式
    const FILTER_INTRA_MODE filter_intra_mode = (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra) ? mbmi->filter_intra_mode_info.filter_intra_mode : FILTER_INTRA_MODES;
    // 对于角度预测模式，通过8个方向预测扩展为8*7=56个方向，每个模式的每个方向之间相差3°，也就是[-9°，-6°，-3°，方向模式本身， 3°， 6°， 9°]
    const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
	
	// 采用了CfL的情况下
    if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED)
    {
        CFL_CTX* const cfl = &xd->cfl;
        CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
        if (cfl->dc_pred_is_cached[pred_plane] == 0)
        {
            av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode, dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
            if (cfl->use_dc_pred_cache)
            {
                cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
                cfl->dc_pred_is_cached[pred_plane] = 1;
            }
        }
        else
        {
            cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
        }
        cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
        return;
    }
    // 从该函数正式进入单个component的帧内预测
    av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode, dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
}

进入到函数 av1_predict_intra_block 后，就开始建立intra predictor了，与其他的标准一样，帧内预测首先要准备好neighbors。

代码语言：javascript复制

// 这个函数主要 1. 处理调色板模式，处理完直接返回；2. 非调色板模式的情况下，那么做帧内预测就需要知道当前块的周边块是否存在且可用。如果不可用的话，在下一级的函数，也就是build_intra_predictors函数里进行padding构建预测所需要的reference
void av1_predict_intra_block(const AV1_COMMON* cm, const MACROBLOCKD* xd, int wpx, int hpx, TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette, 
    FILTER_INTRA_MODE filter_intra_mode, const uint8_t* ref, int ref_stride, uint8_t* dst, int dst_stride, int col_off, int row_off, int plane) 
{
    const MB_MODE_INFO* const mbmi = xd->mi[0];
    const int txwpx = tx_size_wide[tx_size];
    const int txhpx = tx_size_high[tx_size];
    const int x = col_off << MI_SIZE_LOG2;
    const int y = row_off << MI_SIZE_LOG2;

	// 对于调色板模式，直接参照palette map可以生成predictor，之后return出去
    if (use_palette) 
    {
        int r, c;
        const uint8_t* const map = xd->plane[plane != 0].color_index_map   xd->color_index_map_offset[plane != 0];
        const uint16_t* const palette = mbmi->palette_mode_info.palette_colors   plane * PALETTE_MAX_SIZE;
        if (is_cur_buf_hbd(xd)) 
        {
            uint16_t* dst16 = CONVERT_TO_SHORTPTR(dst);
            for (r = 0; r < txhpx;   r) 
            {
                for (c = 0; c < txwpx;   c) 
                {
                    dst16[r * dst_stride   c] = palette[map[(r   y) * wpx   c   x]];
                }
            }
        }
        else 
        {
            for (r = 0; r < txhpx;   r) 
            {
                for (c = 0; c < txwpx;   c) 
                {
                    dst[r * dst_stride   c] = (uint8_t)palette[map[(r   y) * wpx   c   x]];
                }
            }
        }
        return;
    }

    const struct macroblockd_plane* const pd = &xd->plane[plane];
    const int txw = tx_size_wide_unit[tx_size];
    const int txh = tx_size_high_unit[tx_size];
    const int ss_x = pd->subsampling_x;
    const int ss_y = pd->subsampling_y;
    const int have_top = row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
    const int have_left = col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
    const int mi_row = -xd->mb_to_top_edge >> (3   MI_SIZE_LOG2);
    const int mi_col = -xd->mb_to_left_edge >> (3   MI_SIZE_LOG2);
    const int xr_chr_offset = 0;
    const int yd_chr_offset = 0;

    // Distance between the right edge of this prediction block to
    // the frame right edge
    const int xr = (xd->mb_to_right_edge >> (3   ss_x))   (wpx - x - txwpx) - xr_chr_offset;
    // Distance between the bottom edge of this prediction block to
    // the frame bottom edge
    const int yd = (xd->mb_to_bottom_edge >> (3   ss_y))   (hpx - y - txhpx) - yd_chr_offset;
    const int right_available = mi_col   ((col_off   txw) << ss_x) < xd->tile.mi_col_end;
    const int bottom_available = (yd > 0) && (mi_row   ((row_off   txh) << ss_y) < xd->tile.mi_row_end);

    const PARTITION_TYPE partition = mbmi->partition;

    BLOCK_SIZE bsize = mbmi->sb_type;
    // force 4x4 chroma component block size.
    if (ss_x || ss_y) 
    {
        bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
    }
	
	// 检测左，左下，上，右上的neighbor是否存在
    const int have_top_right = has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size, row_off, col_off, ss_x, ss_y);
    const int have_bottom_left = has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left, partition, tx_size, row_off, col_off, ss_x, ss_y);

    const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;

// 高于8 bit的情况
#if CONFIG_AV1_HIGHBITDEPTH
    if (is_cur_buf_hbd(xd)) 
    {
        build_intra_predictors_high(
            xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
            filter_intra_mode, tx_size, disable_edge_filter,
            have_top ? AOMMIN(txwpx, xr   txwpx) : 0,
            have_top_right ? AOMMIN(txwpx, xr) : 0,
            have_left ? AOMMIN(txhpx, yd   txhpx) : 0,
            have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
        return;
    }
#endif

// 得知了neighbor存在与否的状态后，进入生成predictor的步骤
    build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode, tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr   txwpx) : 0, have_top_right ? AOMMIN(txwpx, xr) : 0,
        have_left ? AOMMIN(txhpx, yd   txhpx) : 0, have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
}

到该函数为止，已经知晓了当前块的四个方向的邻居（neighbor）是否存在，接下来，根据预测模式的不同，决定上，右上，左，左下四个方向的邻居是否会在生成预测器的计算过程中需要到，如果需要，且不存在，那么就要生成。

代码语言：javascript复制

static void build_intra_predictors(const MACROBLOCKD* xd, const uint8_t* ref,
    int ref_stride, uint8_t* dst, int dst_stride, PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px, int n_bottomleft_px, int plane)
{
    int i;
    const uint8_t* above_ref = ref - ref_stride;
    const uint8_t* left_ref = ref - 1;
    DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2   32]);
    DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2   32]);
    uint8_t* const above_row = above_data   16;
    uint8_t* const left_col = left_data   16;
    const int txwpx = tx_size_wide[tx_size];
    const int txhpx = tx_size_high[tx_size];
    int need_left = extend_modes[mode] & NEED_LEFT;
    int need_above = extend_modes[mode] & NEED_ABOVE;
    int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
    int p_angle = 0;
    const int is_dr_mode = av1_is_directional_mode(mode);
    const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;

    // The default values if ref pixels are not available:
    // 128 127 127 .. 127 127 127 127 127 127
    // 129  A   B  ..  Y   Z
    // 129  C   D  ..  W   X
    // 129  E   F  ..  U   V
    // 129  G   H  ..  S   T   T   T   T   T
    // ..
	
	// 方向预测模式才需要考虑参考sample存在的情况，其他模式，如DC，PAETH，和Smooth模式不需要
    if (is_dr_mode)
    {
        p_angle = mode_to_angle_map[mode]   angle_delta;
        if (p_angle <= 90)
            need_above = 1, need_left = 0, need_above_left = 1;
        else if (p_angle < 180)
            need_above = 1, need_left = 1, need_above_left = 1;
        else
            need_above = 0, need_left = 1, need_above_left = 1;
    }
    // intra filtering的预测模式情况下，三个方向的reference sample都需要
    if (use_filter_intra)
        need_left = need_above = need_above_left = 1;

    assert(n_top_px >= 0);
    assert(n_topright_px >= 0);
    assert(n_left_px >= 0);
    assert(n_bottomleft_px >= 0);

    if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0))
    {
        int val;
        if (need_left) {
            val = (n_top_px > 0) ? above_ref[0] : 129;
        }
        else {
            val = (n_left_px > 0) ? left_ref[0] : 127;
        }
        for (i = 0; i < txhpx;   i) {
            memset(dst, val, txwpx);
            dst  = dst_stride;
        }
        return;
    }

    // 需要左边ref或者需要左下ref
    if (need_left) {
        int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
        if (use_filter_intra) need_bottom = 0;
        if (is_dr_mode) need_bottom = p_angle > 180;
        // the avx2 dr_prediction_z2 may read at most 3 extra bytes,
        // due to the avx2 mask load is with dword granularity.
        // so we initialize 3 extra bytes to silence valgrind complain.
        const int num_left_pixels_needed = txhpx   (need_bottom ? txwpx : 3);
        i = 0;
        if (n_left_px > 0) {
            for (; i < n_left_px; i  ) left_col[i] = left_ref[i * ref_stride];
            if (need_bottom && n_bottomleft_px > 0) {
                assert(i == txhpx);
                for (; i < txhpx   n_bottomleft_px; i  )
                    left_col[i] = left_ref[i * ref_stride];
            }
            if (i < num_left_pixels_needed)
                memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
        }
        else {
            if (n_top_px > 0) {
                memset(left_col, above_ref[0], num_left_pixels_needed);
            }
            else {
                memset(left_col, 129, num_left_pixels_needed);
            }
        }
    }

    // NEED_ABOVE
    if (need_above) {
        int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
        if (use_filter_intra) need_right = 0;
        if (is_dr_mode) need_right = p_angle < 90;
        const int num_top_pixels_needed = txwpx   (need_right ? txhpx : 0);
        if (n_top_px > 0) {
            memcpy(above_row, above_ref, n_top_px);
            i = n_top_px;
            if (need_right && n_topright_px > 0) {
                assert(n_top_px == txwpx);
                memcpy(above_row   txwpx, above_ref   txwpx, n_topright_px);
                i  = n_topright_px;
            }
            if (i < num_top_pixels_needed)
                memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
        }
        else {
            if (n_left_px > 0) {
                memset(above_row, left_ref[0], num_top_pixels_needed);
            }
            else {
                memset(above_row, 127, num_top_pixels_needed);
            }
        }
    }

    if (need_above_left) {
        if (n_top_px > 0 && n_left_px > 0) {
            above_row[-1] = above_ref[-1];
        }
        else if (n_top_px > 0) {
            above_row[-1] = above_ref[0];
        }
        else if (n_left_px > 0) {
            above_row[-1] = left_ref[0];
        }
        else {
            above_row[-1] = 128;
        }
        left_col[-1] = above_row[-1];
    }

    if (use_filter_intra) 
    {
        av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col, filter_intra_mode);
        return;
    }

    if (is_dr_mode) {
        int upsample_above = 0;
        int upsample_left = 0;
        if (!disable_edge_filter) {
            const int need_right = p_angle < 90;
            const int need_bottom = p_angle > 180;
            const int filt_type = get_filt_type(xd, plane);
            if (p_angle != 90 && p_angle != 180) {
                const int ab_le = need_above_left ? 1 : 0;
                if (need_above && need_left && (txwpx   txhpx >= 24)) {
                    filter_intra_edge_corner(above_row, left_col);
                }
                if (need_above && n_top_px > 0) {
                    const int strength =
                        intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
                    const int n_px = n_top_px   ab_le   (need_right ? txhpx : 0);
                    av1_filter_intra_edge(above_row - ab_le, n_px, strength);
                }
                if (need_left && n_left_px > 0) {
                    const int strength = intra_edge_filter_strength(
                        txhpx, txwpx, p_angle - 180, filt_type);
                    const int n_px = n_left_px   ab_le   (need_bottom ? txwpx : 0);
                    av1_filter_intra_edge(left_col - ab_le, n_px, strength);
                }
            }
            upsample_above =
                av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
            if (need_above && upsample_above) {
                const int n_px = txwpx   (need_right ? txhpx : 0);
                av1_upsample_intra_edge(above_row, n_px);
            }
            upsample_left =
                av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
            if (need_left && upsample_left) {
                const int n_px = txhpx   (need_bottom ? txwpx : 0);
                av1_upsample_intra_edge(left_col, n_px);
            }
        }
        dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above, upsample_left, p_angle);
        return;
    }

    // DC模式
    if (mode == DC_PRED) {
        dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row, left_col);
    }
    // 非方向预测模式中除去DC模式外的其他模式
    else {
        pred[mode][tx_size](dst, dst_stride, above_row, left_col);
    }
}

block predict 博客

0 人点赞