From ae4d4378e8aee8a15c434a4822d27e8e1f0826bd Mon Sep 17 00:00:00 2001 From: Yanjun Liao Date: Thu, 27 Mar 2025 18:06:41 +0800 Subject: [PATCH] fix[vepu_511]: Speed grade configuration of 0.67 Modified default speed preset configuration to 0.67 PPC, delivering ~40 FPS for 4K resolution at 500MHz Change-Id: If4b11f1d45871a377403afb8dd98f727124ce499 Signed-off-by: Yanjun Liao --- mpp/hal/rkenc/h264e/hal_h264e_vepu511.c | 72 ++++++++++++++----------- mpp/hal/rkenc/h265e/hal_h265e_vepu511.c | 52 +++++++++++------- 2 files changed, 76 insertions(+), 48 deletions(-) diff --git a/mpp/hal/rkenc/h264e/hal_h264e_vepu511.c b/mpp/hal/rkenc/h264e/hal_h264e_vepu511.c index 112883e3..d383c097 100644 --- a/mpp/hal/rkenc/h264e/hal_h264e_vepu511.c +++ b/mpp/hal/rkenc/h264e/hal_h264e_vepu511.c @@ -1103,23 +1103,35 @@ static void setup_vepu511_rdo_pred(HalH264eVepu511Ctx *ctx) hal_h264e_dbg_func("enter\n"); + /* + * H264 Mode Mask of Mode Decision. + * More prediction modes lead to better compression performance but increase computational cycles. + * + * Default speed preset configuration to 0.67 PPC, ~40 FPS for 4K resolution at 500MHz: + * - Set i4/i16 partition RDO numbers to 1 for P-frames and all other CU RDO numbers to 2. + * - Set cime_fuse = 0, enable dual-window search for higher compression performance. + * - Set fme_lvl_mrg = 1, enable FME's depth1 and depth2 joint search, + * improves real-time performance but will reduce the compression ratio. + * - Set cime_srch_lftw/rgtw/uph/dwnh = 12/12/15/15, expand CIME search range degraded real-time performance. + * - Set rime_prelvl_en = 0, disable RIME pre-level to improve real-time performance. + */ if (slice->slice_type == H264_I_SLICE) { regs->reg_rc_roi.klut_ofst.chrm_klut_ofst = 6; - reg_frm->rdo_mark_mode.iframe_i4_rdo_num = 1; - reg_frm->rdo_mark_mode.i8_rdo_num = 1; + reg_frm->rdo_mark_mode.iframe_i4_rdo_num = 2; + reg_frm->rdo_mark_mode.i8_rdo_num = 2; reg_frm->rdo_mark_mode.iframe_i16_rdo_num = 2; - reg_frm->rdo_mark_mode.rdo_mark_mode = 0; + reg_frm->rdo_mark_mode.rdo_mark_mode = 0; } else { regs->reg_rc_roi.klut_ofst.chrm_klut_ofst = is_ipc_scene ? 9 : 6; - reg_frm->rdo_mark_mode.p16_interp_num = 2; - reg_frm->rdo_mark_mode.p16t8_rdo_num = 2; - reg_frm->rdo_mark_mode.p16t4_rmd_num = 2; - reg_frm->rdo_mark_mode.rdo_mark_mode = 0; - reg_frm->rdo_mark_mode.p8_interp_num = 3; - reg_frm->rdo_mark_mode.p8t8_rdo_num = 2; - reg_frm->rdo_mark_mode.p8t4_rmd_num = 2; - regs->reg_frm.rdo_mark_mode.i8_rdo_num = 1; - regs->reg_frm.rdo_mark_mode.iframe_i4_rdo_num = 1; + reg_frm->rdo_mark_mode.p16_interp_num = 2; + reg_frm->rdo_mark_mode.p16t8_rdo_num = 2; + reg_frm->rdo_mark_mode.p16t4_rmd_num = 2; + reg_frm->rdo_mark_mode.rdo_mark_mode = 0; + reg_frm->rdo_mark_mode.p8_interp_num = 2; + reg_frm->rdo_mark_mode.p8t8_rdo_num = 2; + reg_frm->rdo_mark_mode.p8t4_rmd_num = 2; + regs->reg_frm.rdo_mark_mode.i8_rdo_num = 2; + regs->reg_frm.rdo_mark_mode.iframe_i4_rdo_num = 1; regs->reg_frm.rdo_mark_mode.iframe_i16_rdo_num = 1; } @@ -1602,7 +1614,7 @@ static void setup_vepu511_me(HalH264eVepu511Ctx *ctx) reg_frm->common.me_cfg.rme_srch_h = 3; reg_frm->common.me_cfg.rme_srch_v = 3; - reg_frm->common.me_cfg.srgn_max_num = 54; + reg_frm->common.me_cfg.srgn_max_num = 72; reg_frm->common.me_cfg.cime_dist_thre = 1024; reg_frm->common.me_cfg.rme_dis = 0; reg_frm->common.me_cfg.fme_dis = 0; @@ -1610,26 +1622,26 @@ static void setup_vepu511_me(HalH264eVepu511Ctx *ctx) reg_frm->common.me_cach.cime_zero_thre = 64; /* CIME: 0x1760 - 0x176C */ - reg_param->me_sqi_comb.cime_pmv_num = 1; - reg_param->me_sqi_comb.cime_fuse = 1; - reg_param->me_sqi_comb.move_lambda = 0; - reg_param->me_sqi_comb.rime_lvl_mrg = 1; - reg_param->me_sqi_comb.rime_prelvl_en = 0; - reg_param->me_sqi_comb.rime_prersu_en = 0; - reg_param->me_sqi_comb.fme_lvl_mrg = 0; - reg_param->cime_mvd_th_comb.cime_mvd_th0 = 16; - reg_param->cime_mvd_th_comb.cime_mvd_th1 = 48; - reg_param->cime_mvd_th_comb.cime_mvd_th2 = 80; + reg_param->me_sqi_comb.cime_pmv_num = 1; + reg_param->me_sqi_comb.cime_fuse = 0; + reg_param->me_sqi_comb.move_lambda = 0; + reg_param->me_sqi_comb.rime_lvl_mrg = 1; + reg_param->me_sqi_comb.rime_prelvl_en = 0; + reg_param->me_sqi_comb.rime_prersu_en = 0; + reg_param->me_sqi_comb.fme_lvl_mrg = 1; + reg_param->cime_mvd_th_comb.cime_mvd_th0 = 16; + reg_param->cime_mvd_th_comb.cime_mvd_th1 = 48; + reg_param->cime_mvd_th_comb.cime_mvd_th2 = 80; reg_param->cime_madp_th_comb.cime_madp_th = 16; - reg_param->cime_multi_comb.cime_multi0 = 8; - reg_param->cime_multi_comb.cime_multi1 = 12; - reg_param->cime_multi_comb.cime_multi2 = 16; - reg_param->cime_multi_comb.cime_multi3 = 20; + reg_param->cime_multi_comb.cime_multi0 = 8; + reg_param->cime_multi_comb.cime_multi1 = 12; + reg_param->cime_multi_comb.cime_multi2 = 16; + reg_param->cime_multi_comb.cime_multi3 = 20; /* RFME: 0x1770 - 0x1778 */ - reg_param->rime_mvd_th_comb.rime_mvd_th0 = 1; - reg_param->rime_mvd_th_comb.rime_mvd_th1 = 2; - reg_param->rime_mvd_th_comb.fme_madp_th = 0; + reg_param->rime_mvd_th_comb.rime_mvd_th0 = 1; + reg_param->rime_mvd_th_comb.rime_mvd_th1 = 2; + reg_param->rime_mvd_th_comb.fme_madp_th = 0; reg_param->rime_madp_th_comb.rime_madp_th0 = 8; reg_param->rime_madp_th_comb.rime_madp_th1 = 16; reg_param->rime_multi_comb.rime_multi0 = 4; diff --git a/mpp/hal/rkenc/h265e/hal_h265e_vepu511.c b/mpp/hal/rkenc/h265e/hal_h265e_vepu511.c index 5b61fca1..5e2ffb89 100644 --- a/mpp/hal/rkenc/h265e/hal_h265e_vepu511.c +++ b/mpp/hal/rkenc/h265e/hal_h265e_vepu511.c @@ -894,7 +894,27 @@ static void vepu511_h265_set_prep(void *hal, HalEncTask *task, H265eV511RegSet * reg_frm->common.enc_pic.rec_fbc_dis = 0; reg_frm->rdo_cfg.chrm_spcl = 0; - reg_frm->rdo_cfg.cu_inter_e = 0x5b; + + /* + * H265 Max Inter/Intra cu prediction Mode. + * More prediction modes lead to better compression performance but increase computational cycles. + * + * Default speed preset configuration to 0.67 PPC, ~40 FPS for 4K resolution at 500MHz: + * - Set Inter prediction 32/16/8 CUs at 1/3/2 and Intra 32/16/8/4 CUs at 1, + * Maximize the number of modes while ensuring the prediction hierarchy remains unchanged. + * - Set cime_fuse = 1, disable dual-window search for higher real-time performance. + * - Set fme_lvl_mrg = 1, enable FME's depth1 and depth2 joint search, + * improves real-time performance but will reduce the compression ratio. + * - Set cime_srch_lftw/rgtw/uph/dwnh = 12/12/15/15, expand CIME search range degraded real-time performance. + * - Set rime_prelvl_en = 0, disable RIME pre-level to improve real-time performance. + * - Set fmdc_adju_split32 = 0, enable CU32 block prediction. + * Setting fmdc_adju_split32 = 1 restricts prediction to CU16/8 only, improving real-time performance. + */ + reg_frm->rdo_cfg.cu_inter_e = 0x5a; + reg_frm->rdo_intra_mode.intra_pu4_mode_num = 1; + reg_frm->rdo_intra_mode.intra_pu8_mode_num = 1; + reg_frm->rdo_intra_mode.intra_pu16_mode_num = 1; + reg_frm->rdo_intra_mode.intra_pu32_mode_num = 1; if (syn->pp.num_long_term_ref_pics_sps) { reg_frm->rdo_cfg.ltm_col = 0; @@ -919,12 +939,6 @@ static void vepu511_h265_set_prep(void *hal, HalEncTask *task, H265eV511RegSet * reg_frm->synt_nal.nal_unit_type = i_nal_type; } - - reg_frm->rdo_intra_mode.intra_pu4_mode_num = 1; - reg_frm->rdo_intra_mode.intra_pu8_mode_num = 2; - reg_frm->rdo_intra_mode.intra_pu16_mode_num = 2; - reg_frm->rdo_intra_mode.intra_pu32_mode_num = 2; - } static void vepu511_h265_set_split(H265eV511RegSet *regs, MppEncCfgSet *enc_cfg) @@ -1008,19 +1022,20 @@ static void vepu511_h265_set_me_regs(H265eV511HalContext *ctx, H265eSyntax_new * reg_frm->common.me_cach.fme_prefsu_en = 0; /* CIME: 0x1760 - 0x176C */ - s->me_sqi_comb.cime_pmv_num = 1; - s->me_sqi_comb.cime_fuse = 1; - s->me_sqi_comb.move_lambda = 2; + s->me_sqi_comb.cime_pmv_num = 1; + s->me_sqi_comb.cime_fuse = 1; + s->me_sqi_comb.move_lambda = 2; s->me_sqi_comb.rime_lvl_mrg = 0; - s->me_sqi_comb.rime_prelvl_en = 3; + s->me_sqi_comb.rime_prelvl_en = 0; s->me_sqi_comb.rime_prersu_en = 0; - s->me_sqi_comb.fme_lvl_mrg = 0; - s->cime_mvd_th_comb.cime_mvd_th0 = 8; - s->cime_mvd_th_comb.cime_mvd_th1 = 20; - s->cime_mvd_th_comb.cime_mvd_th2 = 32; - s->cime_madp_th_comb.cime_madp_th = 16; + s->me_sqi_comb.fme_lvl_mrg = 1; + + s->cime_mvd_th_comb.cime_mvd_th0 = 8; + s->cime_mvd_th_comb.cime_mvd_th1 = 20; + s->cime_mvd_th_comb.cime_mvd_th2 = 32; + s->cime_madp_th_comb.cime_madp_th = 16; s->cime_madp_th_comb.ratio_consi_cfg = 8; - s->cime_madp_th_comb.ratio_bmv_dist = 8; + s->cime_madp_th_comb.ratio_bmv_dist = 8; s->cime_multi_comb.cime_multi0 = 8; s->cime_multi_comb.cime_multi1 = 12; s->cime_multi_comb.cime_multi2 = 16; @@ -1345,6 +1360,7 @@ static void vepu511_h265_set_rc_regs(H265eV511HalContext *ctx, H265eV511RegSet * reg_rc->roi_qthd2.qpmax_area6 = h265->qpmax_map[6] > 0 ? h265->qpmax_map[6] : rc_cfg->quality_max; reg_rc->roi_qthd2.qpmin_area7 = h265->qpmin_map[7] > 0 ? h265->qpmin_map[7] : rc_cfg->quality_min; reg_rc->roi_qthd3.qpmax_area7 = h265->qpmax_map[7] > 0 ? h265->qpmax_map[7] : rc_cfg->quality_max; + reg_rc->roi_cfg.fmdc_adj1_hevc.fmdc_adju_split32 = 0; } static void vepu511_h265_set_quant_regs(H265eV511HalContext *ctx, H265eV511RegSet *regs) @@ -1931,7 +1947,7 @@ static void vepu511_h265_set_slice_regs(H265eSyntax_new *syn, H265eVepu511Frame regs->synt_sli1.dblk_fltr_ovrd_flg = syn->sp.dblk_fltr_ovrd_flg; regs->synt_sli1.sli_cb_qp_ofst = syn->pp.pps_slice_chroma_qp_offsets_present_flag ? syn->sp.sli_cb_qp_ofst : syn->pp.pps_cb_qp_offset; - regs->synt_sli1.max_mrg_cnd = syn->sp.max_mrg_cnd; + regs->synt_sli1.max_mrg_cnd = 1; regs->synt_sli1.col_ref_idx = syn->sp.col_ref_idx; regs->synt_sli1.col_frm_l0_flg = syn->sp.col_frm_l0_flg;