mirror of
https://github.com/nyanmisaka/mpp.git
synced 2025-10-29 11:42:05 +08:00
[vdpu382]: optimize the rcb buf size calculation
Signed-off-by: Yandong Lin <yandong.lin@rock-chips.com> Change-Id: Ie32b0a973bb999960a65aa03204eefe503ecc7fc
This commit is contained in:
@@ -273,20 +273,78 @@ static MPP_RET init_common_regs(Vdpu382Avs2dRegSet *regs)
|
||||
return MPP_OK;
|
||||
}
|
||||
|
||||
//TODO calc rcb buffer size;
|
||||
/*
|
||||
static void avs2d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
Vdpu382Avs2dRegSet *hw_regs,
|
||||
RK_S32 width, RK_S32 height, void *dxva)
|
||||
{
|
||||
(void) rcb_info;
|
||||
(void) hw_regs;
|
||||
(void) width;
|
||||
(void) height;
|
||||
(void) dxva;
|
||||
Avs2dSyntax_t *syntax = dxva;
|
||||
RK_U8 ctu_size = 1 << syntax->pp.lcu_size;
|
||||
RK_U32 chroma_fmt_idc = syntax->pp.chroma_format_idc;
|
||||
RK_U8 bit_depth = syntax->pp.bit_depth_chroma_minus8 + 8;
|
||||
RK_U32 rcb_bits = 0;
|
||||
|
||||
width = MPP_ALIGN(width, ctu_size);
|
||||
|
||||
/* RCB_STRMD_ROW */
|
||||
if (width >= 8192) {
|
||||
RK_U32 factor = 64 / ctu_size;
|
||||
|
||||
rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24;
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_ROW */
|
||||
if (width >= 8192)
|
||||
rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1);
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_COL */
|
||||
rcb_info[RCB_TRANSD_COL].size = 0;
|
||||
|
||||
/* RCB_INTER_ROW */
|
||||
rcb_bits = width * 21;
|
||||
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTER_COL */
|
||||
rcb_info[RCB_INTER_COL].size = 0;
|
||||
|
||||
/* RCB_INTRA_ROW */
|
||||
rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11;
|
||||
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_DBLK_ROW */
|
||||
if (chroma_fmt_idc == 1 ) {
|
||||
if (ctu_size == 32)
|
||||
rcb_bits = width * ( 4 + 8 * bit_depth);
|
||||
else
|
||||
rcb_bits = width * ( 2 + 8 * bit_depth);
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_SAO_ROW */
|
||||
if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
|
||||
rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
|
||||
} else {
|
||||
rcb_bits = width * (128 / ctu_size + 3 * bit_depth);
|
||||
}
|
||||
rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_FBC_ROW */
|
||||
if (hw_regs->common.reg012.fbc_e)
|
||||
rcb_bits = width * 4 * bit_depth;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_FILT_COL */
|
||||
rcb_info[RCB_FILT_COL].size = 0;
|
||||
return;
|
||||
}
|
||||
*/
|
||||
|
||||
static void hal_avs2d_rcb_info_update(void *hal, Vdpu382Avs2dRegSet *hw_regs)
|
||||
{
|
||||
@@ -298,10 +356,8 @@ static void hal_avs2d_rcb_info_update(void *hal, Vdpu382Avs2dRegSet *hw_regs)
|
||||
RK_S32 i = 0;
|
||||
RK_S32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1;
|
||||
|
||||
(void) hw_regs;
|
||||
|
||||
reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size(reg_ctx->rcb_info, width, height);
|
||||
//avs2d_refine_rcb_size(reg_ctx->rcb_info, hw_regs, width, height, (void *)&p_hal->syntax);
|
||||
avs2d_refine_rcb_size(reg_ctx->rcb_info, hw_regs, width, height, (void *)&p_hal->syntax);
|
||||
|
||||
for (i = 0; i < loop; i++) {
|
||||
MppBuffer rcb_buf = NULL;
|
||||
|
||||
@@ -835,43 +835,52 @@ static void h264d_refine_rcb_size(H264dHalCtx_t *p_hal, Vdpu382RcbInfo *rcb_info
|
||||
|
||||
width = MPP_ALIGN(width, H264_CTU_SIZE);
|
||||
height = MPP_ALIGN(height, H264_CTU_SIZE);
|
||||
|
||||
/* RCB_STRMD_ROW */
|
||||
if (width > 4096)
|
||||
if (width >= 4096)
|
||||
rcb_bits = ((width + 15) / 16) * 154 * (mbaff ? 2 : 1);
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_ROW */
|
||||
if (width > 8192)
|
||||
if (width >= 8192)
|
||||
rcb_bits = ((width - 8192 + 3) / 4) * 2;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_COL */
|
||||
if (height > 8192)
|
||||
rcb_bits = ((height - 8192 + 3) / 4) * 2;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
|
||||
rcb_info[RCB_TRANSD_COL].size = 0;
|
||||
|
||||
/* RCB_INTER_ROW */
|
||||
rcb_bits = width * 42;
|
||||
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTER_COL */
|
||||
rcb_info[RCB_INTER_COL].size = 0;
|
||||
|
||||
/* RCB_INTRA_ROW */
|
||||
rcb_bits = width * 44;
|
||||
if (mbaff)
|
||||
rcb_bits = width * 44;
|
||||
else
|
||||
rcb_bits = width * ((chroma_format_idc ? 1 : 0) + 1) * 11;
|
||||
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_DBLK_ROW */
|
||||
rcb_bits = width * (2 + (mbaff ? 12 : 6) * bit_depth);
|
||||
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_SAO_ROW */
|
||||
rcb_info[RCB_SAO_ROW].size = 0;
|
||||
|
||||
/* RCB_FBC_ROW */
|
||||
if (regs->common.reg012.fbc_e) {
|
||||
rcb_bits = (chroma_format_idc > 1) ? (2 * width * bit_depth) : 0;
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_FILT_COL */
|
||||
rcb_info[RCB_FILT_COL].size = 0;
|
||||
}
|
||||
|
||||
@@ -458,7 +458,7 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
RK_U32 rcb_bits = 0;
|
||||
h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva;
|
||||
DXVA_PicParams_HEVC *pp = &dxva_cxt->pp;
|
||||
RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444
|
||||
RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 420 ,2 422,3 444
|
||||
RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8;
|
||||
RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3);
|
||||
RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1;
|
||||
@@ -466,34 +466,42 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
|
||||
width = MPP_ALIGN(width, ctu_size);
|
||||
height = MPP_ALIGN(height, ctu_size);
|
||||
|
||||
/* RCB_STRMD_ROW */
|
||||
if (width > 8192) {
|
||||
RK_U32 factor = ctu_size / 16;
|
||||
rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) * factor * 24 + ext_align_size;
|
||||
if (width >= 8192) {
|
||||
RK_U32 factor = 64 / ctu_size;
|
||||
|
||||
rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) / factor * 24 + ext_align_size;
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_ROW */
|
||||
if (width > 8192)
|
||||
if (width >= 8192)
|
||||
rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_COL */
|
||||
if (height > 8192)
|
||||
rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size;
|
||||
if (height >= 8192 && tile_col_cut_num)
|
||||
rcb_bits = tile_col_cut_num ? (MPP_ALIGN(height - 8192, 4) << 1) : 0;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTER_ROW */
|
||||
rcb_bits = width * 22 + ext_align_size;
|
||||
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTER_COL */
|
||||
rcb_bits = height * 22 + ext_align_size;
|
||||
rcb_bits = tile_col_cut_num ? (height * 22) : 0;
|
||||
rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTRA_ROW */
|
||||
rcb_bits = width * 48 + ext_align_size;
|
||||
rcb_bits = width * ((chroma_fmt_idc ? 1 : 0) + 1) * 11 + ext_align_size;
|
||||
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_DBLK_ROW */
|
||||
if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
|
||||
if (ctu_size == 32)
|
||||
@@ -508,6 +516,7 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
}
|
||||
rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size;
|
||||
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_SAO_ROW */
|
||||
if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) {
|
||||
rcb_bits = width * (128 / ctu_size + 2 * bit_depth);
|
||||
@@ -516,6 +525,7 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
}
|
||||
rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size;
|
||||
rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_FBC_ROW */
|
||||
if (hw_regs->common.reg012.fbc_e) {
|
||||
rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth;
|
||||
@@ -523,21 +533,24 @@ static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_FILT_COL */
|
||||
if (hw_regs->common.reg012.fbc_e) {
|
||||
RK_U32 ctu_idx = ctu_size >> 5;
|
||||
RK_U32 a = filterd_fbc_on[chroma_fmt_idc][ctu_idx].a;
|
||||
RK_U32 b = filterd_fbc_on[chroma_fmt_idc][ctu_idx].b;
|
||||
if (tile_col_cut_num) {
|
||||
if (hw_regs->common.reg012.fbc_e) {
|
||||
RK_U32 ctu_idx = ctu_size >> 5;
|
||||
RK_U32 a = filterd_fbc_on[ctu_idx][chroma_fmt_idc].a;
|
||||
RK_U32 b = filterd_fbc_on[ctu_idx][chroma_fmt_idc].b;
|
||||
|
||||
rcb_bits = height * (a * bit_depth + b);
|
||||
} else {
|
||||
RK_U32 ctu_idx = ctu_size >> 5;
|
||||
RK_U32 a = filterd_fbc_off[chroma_fmt_idc][ctu_idx].a;
|
||||
RK_U32 b = filterd_fbc_off[chroma_fmt_idc][ctu_idx].b;
|
||||
rcb_bits = height * (a * bit_depth + b);
|
||||
} else {
|
||||
RK_U32 ctu_idx = ctu_size >> 5;
|
||||
RK_U32 a = filterd_fbc_off[ctu_idx][chroma_fmt_idc].a;
|
||||
RK_U32 b = filterd_fbc_off[ctu_idx][chroma_fmt_idc].b;
|
||||
|
||||
rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
|
||||
}
|
||||
rcb_bits += ext_align_size;
|
||||
rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0));
|
||||
}
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
|
||||
}
|
||||
|
||||
|
||||
@@ -278,55 +278,66 @@ static void vp9d_refine_rcb_size(Vdpu382RcbInfo *rcb_info,
|
||||
{
|
||||
RK_U32 rcb_bits = 0;
|
||||
DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data;
|
||||
RK_U32 num_tiles = pic_param->log2_tile_rows;
|
||||
RK_U32 num_tiles_col = 1 << pic_param->log2_tile_cols;
|
||||
RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8;
|
||||
RK_U32 ext_align_size = num_tiles * 64 * 8;
|
||||
RK_U32 ext_align_size = num_tiles_col * 64 * 8;
|
||||
|
||||
width = MPP_ALIGN(width, VP9_CTU_SIZE);
|
||||
height = MPP_ALIGN(height, VP9_CTU_SIZE);
|
||||
/* RCB_STRMD_ROW */
|
||||
if (width > 4096)
|
||||
if (width >= 4096)
|
||||
rcb_bits = MPP_ALIGN(width, 64) * 232 + ext_align_size;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_ROW */
|
||||
if (width > 8192)
|
||||
if (width >= 8192)
|
||||
rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size;
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_TRANSD_COL */
|
||||
if (height > 8192)
|
||||
rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size;
|
||||
if ((height >= 8192) && (num_tiles_col > 1))
|
||||
rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1);
|
||||
else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTER_ROW */
|
||||
rcb_bits = width * 36 + ext_align_size;
|
||||
rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_INTER_COL */
|
||||
rcb_info[RCB_INTER_COL].size = 0;
|
||||
|
||||
/* RCB_INTRA_ROW */
|
||||
rcb_bits = width * 48 + ext_align_size;
|
||||
rcb_bits = width * 2 * 11 + ext_align_size;
|
||||
rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_DBLK_ROW */
|
||||
rcb_bits = width * (1 + 16 * bit_depth) + num_tiles * 192 * bit_depth + ext_align_size;
|
||||
rcb_bits = width * (0.5 + 16 * bit_depth) + num_tiles_col * 192 * bit_depth + ext_align_size;
|
||||
rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_SAO_ROW */
|
||||
rcb_info[RCB_SAO_ROW].size = 0;
|
||||
|
||||
/* RCB_FBC_ROW */
|
||||
if (vp9_hw_regs->common.reg012.fbc_e) {
|
||||
rcb_bits = 8 * width * bit_depth + ext_align_size;
|
||||
} else
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits);
|
||||
|
||||
/* RCB_FILT_COL */
|
||||
if (vp9_hw_regs->common.reg012.fbc_e) {
|
||||
rcb_bits = height * (4 + 24 * bit_depth);
|
||||
if (num_tiles_col > 1) {
|
||||
if (vp9_hw_regs->common.reg012.fbc_e) {
|
||||
rcb_bits = height * (4 + 24 * bit_depth);
|
||||
} else
|
||||
rcb_bits = height * (4 + 16 * bit_depth);
|
||||
} else
|
||||
rcb_bits = height * (4 + 16 * bit_depth);
|
||||
rcb_bits += ext_align_size;
|
||||
rcb_bits = 0;
|
||||
rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user