feat[vdpu383]: align hor stride to 128 odds + 64 byte

for better performance

Signed-off-by: Chandler Chen <chandler.chen@rock-chips.com>
Change-Id: I312c6b22f0c189b0674b0a667c20f68ac83315d6
This commit is contained in:
Chandler Chen
2024-03-08 14:57:25 +08:00
committed by Herman Chen
parent d381031669
commit 8759039d5f
8 changed files with 23 additions and 19 deletions

View File

@@ -407,6 +407,9 @@ static RK_S32 vp9_alloc_frame(Vp9CodecContext *ctx, VP9Frame *frame)
mpp_frame_set_fbc_hdr_stride(frame->f, fbc_hdr_stride);
} else {
if (mpp_get_soc_type() == ROCKCHIP_SOC_RK3576)
mpp_slots_set_prop(s->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
else
mpp_slots_set_prop(s->slots, SLOTS_HOR_ALIGN, mpp_align_256_odd);
mpp_slots_set_prop(s->slots, SLOTS_VER_ALIGN, mpp_align_64);
if (MPP_FRAME_FMT_IS_TILE(s->cfg->base.out_fmt))

View File

@@ -75,12 +75,6 @@ static RK_U32 avs2d_ver_align(RK_U32 val)
return MPP_ALIGN(val, 16);
}
static RK_U32 avs2d_hor_align(RK_U32 val)
{
return MPP_ALIGN(val, 16);
}
static RK_U32 avs2d_len_align(RK_U32 val)
{
return (2 * MPP_ALIGN(val, 16));
@@ -515,7 +509,7 @@ MPP_RET hal_avs2d_vdpu383_init(void *hal, MppHalCfg *cfg)
reg_ctx->sclst_offset = reg_ctx->reg_buf[0].offset_sclst;
}
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, avs2d_hor_align);
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, avs2d_ver_align);
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, avs2d_len_align);

View File

@@ -120,11 +120,6 @@ static RK_U32 rkv_hor_align(RK_U32 val)
return MPP_ALIGN(val, 16);
}
static RK_U32 rkv_hor_align_256_odds(RK_U32 val)
{
return (MPP_ALIGN(val, 256) | 256);
}
static RK_U32 rkv_len_align(RK_U32 val)
{
return (2 * MPP_ALIGN(val, 16));
@@ -1052,7 +1047,7 @@ MPP_RET vdpu383_h264d_control(void *hal, MpiCmd cmd_type, void *param)
if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->frame_slots, (MppFrame)param, 16);
} else if (imgwidth > 1920 || imgheight > 1088) {
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, rkv_hor_align_256_odds);
mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
}
} break;
case MPP_DEC_SET_OUTPUT_FORMAT: {

View File

@@ -1411,6 +1411,8 @@ static MPP_RET hal_h265d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param
case MPP_DEC_SET_FRAME_INFO: {
MppFrame frame = (MppFrame)param;
MppFrameFormat fmt = mpp_frame_get_fmt(frame);
RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param);
RK_U32 imgheight = mpp_frame_get_height((MppFrame)param);
if (fmt == MPP_FMT_YUV422SP) {
mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422);
@@ -1419,6 +1421,8 @@ static MPP_RET hal_h265d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param
}
if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->slots, frame, 16);
} else if (imgwidth > 1920 || imgheight > 1088) {
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
}
break;
}

View File

@@ -336,7 +336,7 @@ static MPP_RET hal_vp9d_vdpu383_init(void *hal, MppHalCfg *cfg)
hw_ctx->mv_base_addr = -1;
hw_ctx->pre_mv_base_addr = -1;
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align);
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align);
if (p_hal->group == NULL) {
@@ -860,8 +860,8 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
/* error stride */
vp9_hw_regs->vp9d_paras.reg80_error_ref_hor_virstride = w / 64;
} else {
sw_y_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_hor_virstride = (mpp_align_128_odd_plus_64((pic_param->width * bit_depth) >> 3) >> 4);
sw_uv_hor_virstride = (mpp_align_128_odd_plus_64((pic_param->width * bit_depth) >> 3) >> 4);
sw_y_virstride = pic_h[0] * sw_y_hor_virstride;
sw_uv_virstride = sw_y_virstride / 2;
@@ -948,7 +948,7 @@ static MPP_RET hal_vp9d_vdpu383_gen_regs(void *hal, HalTaskInfo *task)
if (fbc_en) {
y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) / 64;
} else {
y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4);
y_hor_virstride = uv_hor_virstride = (mpp_align_128_odd_plus_64((ref_frame_width_y * bit_depth) >> 3) >> 4);
}
y_virstride = y_hor_virstride * pic_h[0];
@@ -1289,7 +1289,7 @@ static MPP_RET hal_vp9d_vdpu383_control(void *hal, MpiCmd cmd_type, void *param)
if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 0);
} else {
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align);
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
}
} break;
default : {

View File

@@ -2569,6 +2569,8 @@ MPP_RET vdpu383_av1d_control(void *hal, MpiCmd cmd_type, void *param)
}
if (MPP_FRAME_FMT_IS_FBC(fmt)) {
vdpu383_afbc_align_calc(p_hal->slots, (MppFrame)param, 16);
} else if (imgwidth > 1920 || imgheight > 1088) {
mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64);
}
break;
}

View File

@@ -224,6 +224,7 @@ RK_U32 mpp_align_16(RK_U32 val);
RK_U32 mpp_align_64(RK_U32 val);
RK_U32 mpp_align_128(RK_U32 val);
RK_U32 mpp_align_256_odd(RK_U32 val);
RK_U32 mpp_align_128_odd_plus_64(RK_U32 val);
#ifdef __cplusplus
}

View File

@@ -126,3 +126,8 @@ RK_U32 mpp_align_256_odd(RK_U32 val)
{
return MPP_ALIGN(val, 256) | 256;
}
RK_U32 mpp_align_128_odd_plus_64(RK_U32 val)
{
return ((MPP_ALIGN(val, 128) | 128) + 64);
}