feat[vdpu383]: align hor stride to 128 odds + 64 byte

for better performance

Signed-off-by: Chandler Chen <chandler.chen@rock-chips.com>
Change-Id: I312c6b22f0c189b0674b0a667c20f68ac83315d6
This commit is contained in:
Chandler Chen
2024-03-08 14:57:25 +08:00
committed by Herman Chen
parent d381031669
commit 8759039d5f
8 changed files with 23 additions and 19 deletions

View File

@@ -407,7 +407,10 @@ static RK_S32 vp9_alloc_frame(Vp9CodecContext *ctx, VP9Frame *frame)
mpp_frame_set_fbc_hdr_stride(frame->f, fbc_hdr_stride); mpp_frame_set_fbc_hdr_stride(frame->f, fbc_hdr_stride);
} else { } else {
mpp_slots_set_prop(s->slots, SLOTS_HOR_ALIGN, mpp_align_256_odd); if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3576)
mpp_slots_set_prop(s->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
else
mpp_slots_set_prop(s->slots, SLOTS_HOR_ALIGN, mpp_align_256_odd);
mpp_slots_set_prop(s->slots, SLOTS_VER_ALIGN, mpp_align_64); mpp_slots_set_prop(s->slots, SLOTS_VER_ALIGN, mpp_align_64);
if (MPP_FRAME_FMT_IS_TILE(s->cfg->base.out_fmt)) if (MPP_FRAME_FMT_IS_TILE(s->cfg->base.out_fmt))
mpp_frame_set_fmt(frame->f, ctx->pix_fmt | ((s->cfg->base.out_fmt & (MPP_FRAME_TILE_FLAG)))); mpp_frame_set_fmt(frame->f, ctx->pix_fmt | ((s->cfg->base.out_fmt & (MPP_FRAME_TILE_FLAG))));

View File

@@ -75,12 +75,6 @@ static RK_U32 avs2d_ver_align(RK_U32 val)
return MPP_ALIGN(val, 16); return MPP_ALIGN(val, 16);
} }
static RK_U32 avs2d_hor_align(RK_U32 val)
{
return MPP_ALIGN(val, 16);
}
static RK_U32 avs2d_len_align(RK_U32 val) static RK_U32 avs2d_len_align(RK_U32 val)
{ {
return (2 * MPP_ALIGN(val, 16)); return (2 * MPP_ALIGN(val, 16));
@@ -515,7 +509,7 @@ MPP_RET hal_avs2d_vdpu383_init(void *hal, MppHalCfg *cfg)
reg_ctx->sclst_offset = reg_ctx->reg_buf[0].offset_sclst; reg_ctx->sclst_offset = reg_ctx->reg_buf[0].offset_sclst;
} }
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, avs2d_hor_align); mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, avs2d_ver_align); mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, avs2d_ver_align);
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, avs2d_len_align); mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, avs2d_len_align);

View File

@@ -120,11 +120,6 @@ static RK_U32 rkv_hor_align(RK_U32 val)
return MPP_ALIGN(val, 16); return MPP_ALIGN(val, 16);
} }
static RK_U32 rkv_hor_align_256_odds(RK_U32 val)
{
return (MPP_ALIGN(val, 256) | 256);
}
static RK_U32 rkv_len_align(RK_U32 val) static RK_U32 rkv_len_align(RK_U32 val)
{ {
return (2 * MPP_ALIGN(val, 16)); return (2 * MPP_ALIGN(val, 16));
@@ -1052,7 +1047,7 @@ MPP_RET vdpu383_h264d_control(void *hal, MpiCmd cmd_type, void *param)
if (MPP_FRAME_FMT_IS_FBC(fmt)) { if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->frame_slots, (MppFrame)param, 16); vdpu383_afbc_align_calc(p_hal->frame_slots, (MppFrame)param, 16);
} else if (imgwidth > 1920 || imgheight > 1088) { } else if (imgwidth > 1920 || imgheight > 1088) {
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, rkv_hor_align_256_odds); mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
} }
} break; } break;
case MPP_DEC_SET_OUTPUT_FORMAT: { case MPP_DEC_SET_OUTPUT_FORMAT: {

View File

@@ -1411,6 +1411,8 @@ static MPP_RET hal_h265d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param
case MPP_DEC_SET_FRAME_INFO: { case MPP_DEC_SET_FRAME_INFO: {
MppFrame frame = (MppFrame)param; MppFrame frame = (MppFrame)param;
MppFrameFormat fmt = mpp_frame_get_fmt(frame); MppFrameFormat fmt = mpp_frame_get_fmt(frame);
RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param);
RK_U32 imgheight = mpp_frame_get_height((MppFrame)param);
if (fmt == MPP_FMT_YUV422SP) { if (fmt == MPP_FMT_YUV422SP) {
mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422); mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422);
@@ -1419,6 +1421,8 @@ static MPP_RET hal_h265d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param
} }
if (MPP_FRAME_FMT_IS_FBC(fmt)) { if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->slots, frame, 16); vdpu383_afbc_align_calc(p_hal->slots, frame, 16);
} else if (imgwidth > 1920 || imgheight > 1088) {
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
} }
break; break;
} }

View File

@@ -336,7 +336,7 @@ static MPP_RET hal_vp9d_vdpu383_init(void *hal, MppHalCfg *cfg)
hw_ctx->mv_base_addr = -1; hw_ctx->mv_base_addr = -1;
hw_ctx->pre_mv_base_addr = -1; hw_ctx->pre_mv_base_addr = -1;
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align); mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align); mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align);
if (p_hal->group == NULL) { if (p_hal->group == NULL) {
@@ -860,8 +860,8 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
/* error stride */ /* error stride */
vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = w / 64; vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = w / 64;
} else { } else {
sw_y_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4); sw_y_hor_virstride = (mpp_align_128_odd_plus_64((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4); sw_uv_hor_virstride = (mpp_align_128_odd_plus_64((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_virstride = pic_h[0] * sw_y_hor_virstride; sw_y_virstride = pic_h[0] * sw_y_hor_virstride;
sw_uv_virstride = sw_y_virstride / 2; sw_uv_virstride = sw_y_virstride / 2;
@@ -948,7 +948,7 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
if (fbc_en) { if (fbc_en) {
y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) / 64; y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) / 64;
} else { } else {
y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4); y_hor_virstride = uv_hor_virstride = (mpp_align_128_odd_plus_64((ref_frame_width_y * bit_depth) >> 3) >> 4);
} }
y_virstride = y_hor_virstride * pic_h[0]; y_virstride = y_hor_virstride * pic_h[0];
@@ -1289,7 +1289,7 @@ static MPP_RET hal_vp9d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param)
if (MPP_FRAME_FMT_IS_FBC(fmt)) { if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 0); vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 0);
} else { } else {
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align); mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
} }
} break; } break;
default : { default : {

View File

@@ -2569,6 +2569,8 @@ MPP_RET vdpu383_av1d_control(void *hal, MpiCmd cmd_type, void *param)
} }
if (MPP_FRAME_FMT_IS_FBC(fmt)) { if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 16); vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 16);
} else if (imgwidth > 1920 || imgheight > 1088) {
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
} }
break; break;
} }

View File

@@ -224,6 +224,7 @@ RK_U32 mpp_align_16(RK_U32 val);
RK_U32 mpp_align_64(RK_U32 val); RK_U32 mpp_align_64(RK_U32 val);
RK_U32 mpp_align_128(RK_U32 val); RK_U32 mpp_align_128(RK_U32 val);
RK_U32 mpp_align_256_odd(RK_U32 val); RK_U32 mpp_align_256_odd(RK_U32 val);
RK_U32 mpp_align_128_odd_plus_64(RK_U32 val);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -126,3 +126,8 @@ RK_U32 mpp_align_256_odd(RK_U32 val)
{ {
return MPP_ALIGN(val, 256) | 256; return MPP_ALIGN(val, 256) | 256;
} }
RK_U32 mpp_align_128_odd_plus_64(RK_U32 val)
{
return ((MPP_ALIGN(val, 128) | 128) + 64);
}