[vdpu382]: optimize the rcb buf size calculation

Signed-off-by: Yandong Lin <yandong.lin@rock-chips.com>
Change-Id: Ie32b0a973bb999960a65aa03204eefe503ecc7fc
This commit is contained in:
Yandong Lin
2022-12-01 16:08:14 +08:00
committed by Herman Chen
parent 7f676f7099
commit d9fd027ab3
4 changed files with 140 additions and 51 deletions

View File

@@ -273,20 +273,78 @@ static MPP_RET init_common_regs(Vdpu382Avs2dRegSet *regs)
return MPP_OK;
}
//TODO calc rcb buffer size;
/*
static void avs2d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
Vdpu382Avs2dRegSet *hw_regs,
RK_S32 width, RK_S32 height, void *dxva)
{
(void) rcb_info;
(void) hw_regs;
(void) width;
(void) height;
(void) dxva;
Avs2dSyntax_t *syntax = dxva;
RK_U8 ctu_size = 1 << syntax->pp.lcu_size;
RK_U32 chroma_fmt_idc = syntax->pp.chroma_format_idc;
RK_U8 bit_depth = syntax->pp.bit_depth_chroma_minus8 + 8;
RK_U32 rcb_bits = 0;
width = MPP_ALIGN(width, ctu_size);
/* RCB_STRMD_ROW */
if (width >= 8192) {
RK_U32 factor = 64 / ctu_size;
rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24;
} else
rcb_bits = 0;
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_ROW */
if (width >= 8192)
rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1);
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_COL */
rcb_info[RCB_TRANSD_COL].size = 0;
/* RCB_INTER_ROW */
rcb_bits = width * 21;
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTER_COL */
rcb_info[RCB_INTER_COL].size = 0;
/* RCB_INTRA_ROW */
rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11;
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_DBLK_ROW */
if (chroma_fmt_idc == 1 ) {
if (ctu_size == 32)
rcb_bits = width * ( 4 + 8 * bit_depth);
else
rcb_bits = width * ( 2 + 8 * bit_depth);
} else
rcb_bits = 0;
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_SAO_ROW */
if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
} else {
rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
}
rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_FBC_ROW */
if (hw_regs->common.reg012.fbc_e)
rcb_bits = width * 4 * bit_depth;
else
rcb_bits = 0;
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_FILT_COL */
rcb_info[RCB_FILT_COL].size = 0;
return;
}
*/
static void hal_avs2d_rcb_info_update(void *hal, Vdpu382Avs2dRegSet *hw_regs)
{
@@ -298,10 +356,8 @@ static void hal_avs2d_rcb_info_update(void *hal, Vdpu382Avs2dRegSet *hw_regs)
RK_S32 i = 0;
RK_S32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
(void) hw_regs;
reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size(reg_ctx->rcb_info, width, height);
//avs2d_refine_rcb_size(reg_ctx->rcb_info, hw_regs, width, height, (void *)&p_hal->syntax);
avs2d_refine_rcb_size(reg_ctx->rcb_info, hw_regs, width, height, (void *)&p_hal->syntax);
for (i = 0; i < loop; i++) {
MppBuffer rcb_buf = NULL;

View File

@@ -835,43 +835,52 @@ static void h264d_refine_rcb_size(H264dHalCtx_t *p_hal, Vdpu382RcbInfo *rcb_info
width = MPP_ALIGN(width, H264_CTU_SIZE);
height = MPP_ALIGN(height, H264_CTU_SIZE);
/* RCB_STRMD_ROW */
if (width > 4096)
if (width >= 4096)
rcb_bits = ((width + 15) / 16) * 154 * (mbaff ? 2 : 1);
else
rcb_bits = 0;
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_ROW */
if (width > 8192)
if (width >= 8192)
rcb_bits = ((width - 8192 + 3) / 4) * 2;
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_COL */
if (height > 8192)
rcb_bits = ((height - 8192 + 3) / 4) * 2;
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
rcb_info[RCB_TRANSD_COL].size = 0;
/* RCB_INTER_ROW */
rcb_bits = width * 42;
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTER_COL */
rcb_info[RCB_INTER_COL].size = 0;
/* RCB_INTRA_ROW */
rcb_bits = width * 44;
if (mbaff)
rcb_bits = width * 44;
else
rcb_bits = width * ((chroma_format_idc ? 1 : 0) + 1) * 11;
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_DBLK_ROW */
rcb_bits = width * (2 + (mbaff ? 12 : 6) * bit_depth);
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_SAO_ROW */
rcb_info[RCB_SAO_ROW].size = 0;
/* RCB_FBC_ROW */
if (regs->common.reg012.fbc_e) {
rcb_bits = (chroma_format_idc > 1) ? (2 * width * bit_depth) : 0;
} else
rcb_bits = 0;
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_FILT_COL */
rcb_info[RCB_FILT_COL].size = 0;
}

View File

@@ -458,7 +458,7 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
RK_U32 rcb_bits = 0;
h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 420 ,2 422,3 444
RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
@@ -466,34 +466,42 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
width = MPP_ALIGN(width, ctu_size);
height = MPP_ALIGN(height, ctu_size);
/* RCB_STRMD_ROW */
if (width > 8192) {
RK_U32 factor = ctu_size / 16;
rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) * factor * 24 + ext_align_size;
if (width >= 8192) {
RK_U32 factor = 64 / ctu_size;
rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24 + ext_align_size;
} else
rcb_bits = 0;
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_ROW */
if (width > 8192)
if (width >= 8192)
rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_COL */
if (height > 8192)
rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size;
if (height >= 8192 && tile_col_cut_num)
rcb_bits = tile_col_cut_num ? (MPP_ALIGN(height - 8192, 4) << 1) : 0;
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTER_ROW */
rcb_bits = width * 22 + ext_align_size;
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTER_COL */
rcb_bits = height * 22 + ext_align_size;
rcb_bits = tile_col_cut_num ? (height * 22) : 0;
rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTRA_ROW */
rcb_bits = width * 48 + ext_align_size;
rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11 + ext_align_size;
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_DBLK_ROW */
if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
if (ctu_size == 32)
@@ -508,6 +516,7 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
}
rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_SAO_ROW */
if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
@@ -516,6 +525,7 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
}
rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_FBC_ROW */
if (hw_regs->common.reg012.fbc_e) {
rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
@@ -523,21 +533,24 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
} else
rcb_bits = 0;
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_FILT_COL */
if (hw_regs->common.reg012.fbc_e) {
RK_U32 ctu_idx = ctu_size >> 5;
RK_U32 a = filterd_fbc_on[chroma_fmt_idc][ctu_idx].a;
RK_U32 b = filterd_fbc_on[chroma_fmt_idc][ctu_idx].b;
if (tile_col_cut_num) {
if (hw_regs->common.reg012.fbc_e) {
RK_U32 ctu_idx = ctu_size >> 5;
RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
rcb_bits = height * (a * bit_depth + b);
} else {
RK_U32 ctu_idx = ctu_size >> 5;
RK_U32 a = filterd_fbc_off[chroma_fmt_idc][ctu_idx].a;
RK_U32 b = filterd_fbc_off[chroma_fmt_idc][ctu_idx].b;
rcb_bits = height * (a * bit_depth + b);
} else {
RK_U32 ctu_idx = ctu_size >> 5;
RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
}
rcb_bits += ext_align_size;
rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
}
} else
rcb_bits = 0;
rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
}

View File

@@ -278,55 +278,66 @@ static void vp9d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
{
RK_U32 rcb_bits = 0;
DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
RK_U32 num_tiles = pic_param->log2_tile_rows;
RK_U32 num_tiles_col = 1 << pic_param->log2_tile_cols;
RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
RK_U32 ext_align_size = num_tiles * 64 * 8;
RK_U32 ext_align_size = num_tiles_col * 64 * 8;
width = MPP_ALIGN(width, VP9_CTU_SIZE);
height = MPP_ALIGN(height, VP9_CTU_SIZE);
/* RCB_STRMD_ROW */
if (width > 4096)
if (width >= 4096)
rcb_bits = MPP_ALIGN(width, 64) * 232 + ext_align_size;
else
rcb_bits = 0;
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_ROW */
if (width > 8192)
if (width >= 8192)
rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_TRANSD_COL */
if (height > 8192)
rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size;
if ((height >= 8192) && (num_tiles_col > 1))
rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1);
else
rcb_bits = 0;
rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTER_ROW */
rcb_bits = width * 36 + ext_align_size;
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_INTER_COL */
rcb_info[RCB_INTER_COL].size = 0;
/* RCB_INTRA_ROW */
rcb_bits = width * 48 + ext_align_size;
rcb_bits = width * 2 * 11 + ext_align_size;
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_DBLK_ROW */
rcb_bits = width * (1 + 16 * bit_depth) + num_tiles * 192 * bit_depth + ext_align_size;
rcb_bits = width * (0.5 + 16 * bit_depth) + num_tiles_col * 192 * bit_depth + ext_align_size;
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_SAO_ROW */
rcb_info[RCB_SAO_ROW].size = 0;
/* RCB_FBC_ROW */
if (vp9_hw_regs->common.reg012.fbc_e) {
rcb_bits = 8 * width * bit_depth + ext_align_size;
} else
rcb_bits = 0;
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
/* RCB_FILT_COL */
if (vp9_hw_regs->common.reg012.fbc_e) {
rcb_bits = height * (4 + 24 * bit_depth);
if (num_tiles_col > 1) {
if (vp9_hw_regs->common.reg012.fbc_e) {
rcb_bits = height * (4 + 24 * bit_depth);
} else
rcb_bits = height * (4 + 16 * bit_depth);
} else
rcb_bits = height * (4 + 16 * bit_depth);
rcb_bits += ext_align_size;
rcb_bits = 0;
rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
}