From a4bd0d2a69ca46c9945c37e602ab76b244814936 Mon Sep 17 00:00:00 2001 From: "xiaoxu.chen" Date: Wed, 25 Sep 2024 11:36:24 +0800 Subject: [PATCH] refactor[h265]: unify calculation tile width Change-Id: Ib5200e332cc5be47f79561570c0342e7690e4587 Signed-off-by: xiaoxu.chen --- mpp/codec/enc/h265/h265e_ps.c | 17 ++++++++++++- mpp/codec/enc/h265/h265e_syntax.c | 8 +++---- mpp/hal/rkenc/h265e/hal_h265e_vepu541.c | 32 ++++++++++--------------- mpp/hal/rkenc/h265e/hal_h265e_vepu580.c | 29 ++++++++++------------ 4 files changed, 45 insertions(+), 41 deletions(-) diff --git a/mpp/codec/enc/h265/h265e_ps.c b/mpp/codec/enc/h265/h265e_ps.c index 88261d9f..aaec0073 100644 --- a/mpp/codec/enc/h265/h265e_ps.c +++ b/mpp/codec/enc/h265/h265e_ps.c @@ -458,6 +458,10 @@ MPP_RET h265e_set_pps(H265eCtx *ctx, H265ePps *pps, H265eSps *sps) pps->m_loopFilterAcrossTilesEnabledFlag = !codec->lpf_acs_tile_disable; { RockchipSocType soc_type = mpp_get_soc_type(); + RK_S32 index; + RK_S32 mb_w = (sps->m_picWidthInLumaSamples + sps->m_maxCUSize - 1) / sps->m_maxCUSize; + RK_S32 mb_h = (sps->m_picHeightInLumaSamples + sps->m_maxCUSize - 1) / sps->m_maxCUSize; + RK_S32 tile_width; /* check tile support on rk3566 and rk3568 */ if (soc_type == ROCKCHIP_SOC_RK3566 || soc_type == ROCKCHIP_SOC_RK3568) { @@ -477,7 +481,18 @@ MPP_RET h265e_set_pps(H265eCtx *ctx, H265ePps *pps, H265eSps *sps) if (pps->m_nNumTileColumnsMinus1) { pps->m_tiles_enabled_flag = 1; pps->m_bTileUniformSpacing = 1; - pps->m_loopFilterAcrossTilesEnabledFlag = !codec->lpf_acs_tile_disable;; + pps->m_loopFilterAcrossTilesEnabledFlag = !codec->lpf_acs_tile_disable; + + /* calc width per tile */ + for (index = 0; index < pps->m_nNumTileColumnsMinus1; index++) { + tile_width = (index + 1) * mb_w / (pps->m_nNumTileColumnsMinus1 + 1) - + index * mb_w / (pps->m_nNumTileColumnsMinus1 + 1); + pps->m_nTileColumnWidthArray[index] = tile_width; + pps->m_nTileRowHeightArray[index] = mb_h; + } + tile_width = mb_w - index * mb_w / (pps->m_nNumTileColumnsMinus1 + 1); + pps->m_nTileColumnWidthArray[index] = tile_width; + pps->m_nTileRowHeightArray[index] = mb_h; } } diff --git a/mpp/codec/enc/h265/h265e_syntax.c b/mpp/codec/enc/h265/h265e_syntax.c index f4acfc34..09ff6ac9 100644 --- a/mpp/codec/enc/h265/h265e_syntax.c +++ b/mpp/codec/enc/h265/h265e_syntax.c @@ -120,11 +120,11 @@ static void fill_picture_parameters(const H265eCtx *h, pp->num_tile_columns_minus1 = pps->m_nNumTileColumnsMinus1; pp->num_tile_rows_minus1 = pps->m_nNumTileRowsMinus1; - for (i = 0; i < pp->num_tile_columns_minus1; i++) - pp->column_width_minus1[i] = pps->m_nTileColumnWidthArray[i]; + for (i = 0; i <= pp->num_tile_columns_minus1; i++) + pp->column_width_minus1[i] = pps->m_nTileColumnWidthArray[i] - 1; - for (i = 0; i < pp->num_tile_rows_minus1; i++) - pp->row_height_minus1[i] = pps->m_nTileRowHeightArray[i]; + for (i = 0; i <= pp->num_tile_rows_minus1; i++) + pp->row_height_minus1[i] = pps->m_nTileRowHeightArray[i] - 1; } } diff --git a/mpp/hal/rkenc/h265e/hal_h265e_vepu541.c b/mpp/hal/rkenc/h265e/hal_h265e_vepu541.c index a78a87b9..cc27c03c 100644 --- a/mpp/hal/rkenc/h265e/hal_h265e_vepu541.c +++ b/mpp/hal/rkenc/h265e/hal_h265e_vepu541.c @@ -1331,7 +1331,8 @@ static void vepu541_h265_set_me_regs(H265eV541HalContext *ctx, H265eSyntax_new * regs->me_ram.cach_l2_tag = 0x3; } -static void vepu540_h265_set_me_ram(H265eSyntax_new *syn, H265eV541RegSet *regs, RK_U32 index) +static void vepu540_h265_set_me_ram(H265eSyntax_new *syn, H265eV541RegSet *regs, + RK_U32 index, RK_S32 tile_start_x) { RK_U32 cime_w = 11, cime_h = 7; RK_U32 pic_cime_temp = 0; @@ -1340,16 +1341,10 @@ static void vepu540_h265_set_me_ram(H265eSyntax_new *syn, H265eV541RegSet *regs, regs->me_ram.cime_linebuf_w = pic_cime_temp / 64; } else { RK_S32 pic_wd64 = MPP_ALIGN(syn->pp.pic_width, 64) >> 6; - RK_S32 tile_ctu_stax = index * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1); - RK_S32 tile_ctu_endx = 0; + RK_S32 tile_ctu_stax = tile_start_x; + RK_S32 tile_ctu_endx = tile_start_x + syn->pp.column_width_minus1[index]; RK_S32 cime_srch_w = regs->me_rnge.cime_srch_h; - if (index == syn->pp.num_tile_columns_minus1) { - tile_ctu_endx = ((regs->enc_rsl.pic_wd8_m1 + 1) * 8 + 63) / 64 - 1; - } else { - tile_ctu_endx = (index + 1) * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1) - 1; - } - if (tile_ctu_stax < (cime_srch_w + 3) / 4) { if (tile_ctu_endx + 1 + (cime_srch_w + 3) / 4 > pic_wd64) pic_cime_temp = pic_wd64 * 64; @@ -1668,21 +1663,18 @@ MPP_RET hal_h265e_v541_gen_regs(void *hal, HalEncTask *task) hal_h265e_leave(); return MPP_OK; } -void hal_h265e_v540_set_uniform_tile(H265eV541RegSet *regs, H265eSyntax_new *syn, RK_U32 index) +void hal_h265e_v540_set_uniform_tile(H265eV541RegSet *regs, H265eSyntax_new *syn, + RK_U32 index, RK_S32 tile_start_x) { if (syn->pp.tiles_enabled_flag) { - RK_S32 mb_w = MPP_ALIGN(syn->pp.pic_width, 64) / 64; RK_S32 mb_h = MPP_ALIGN(syn->pp.pic_height, 64) / 64; - RK_S32 tile_width = (index + 1) * mb_w / (syn->pp.num_tile_columns_minus1 + 1) - - index * mb_w / (syn->pp.num_tile_columns_minus1 + 1); - if (index == syn->pp.num_tile_columns_minus1) { - tile_width = mb_w - index * mb_w / (syn->pp.num_tile_columns_minus1 + 1); - } + RK_S32 tile_width = syn->pp.column_width_minus1[index] + 1; + regs->tile_cfg.tile_width_m1 = tile_width - 1; regs->tile_cfg.tile_height_m1 = mb_h - 1; regs->rc_cfg.rc_ctu_num = tile_width; regs->tile_cfg.tile_en = syn->pp.tiles_enabled_flag; - regs->tile_pos.tile_x = (index * mb_w / (syn->pp.num_tile_columns_minus1 + 1)); + regs->tile_pos.tile_x = tile_start_x; regs->tile_pos.tile_y = 0; if (index > 0) { RK_U32 tmp = regs->lpfr_addr_hevc; @@ -1707,6 +1699,7 @@ MPP_RET hal_h265e_v540_start(void *hal, HalEncTask *enc_task) hal_h265e_enter(); RK_U32 stream_len = 0; VepuFmtCfg *fmt = (VepuFmtCfg *)ctx->input_fmt; + RK_S32 tile_start_x = 0; if (enc_task->flags.err) { hal_h265e_err("enc_task->flags.err %08x, return e arly", @@ -1721,13 +1714,13 @@ MPP_RET hal_h265e_v540_start(void *hal, HalEncTask *enc_task) MppDevRegWrCfg cfg; MppDevRegRdCfg cfg1; - vepu540_h265_set_me_ram(syn, hw_regs, k); + vepu540_h265_set_me_ram(syn, hw_regs, k, tile_start_x); /* set input info */ vepu541_h265_set_l2_regs(ctx, (H265eV54xL2RegSet*)ctx->l2_regs); vepu541_h265_set_patch_info(ctx->dev, syn, (Vepu541Fmt)fmt->format, enc_task); if (title_num > 1) - hal_h265e_v540_set_uniform_tile(hw_regs, syn, k); + hal_h265e_v540_set_uniform_tile(hw_regs, syn, k, tile_start_x); if (k > 0) { MppDevRegOffsetCfg cfg_fd; RK_U32 offset = mpp_packet_get_length(enc_task->packet); @@ -1807,6 +1800,7 @@ MPP_RET hal_h265e_v540_start(void *hal, HalEncTask *enc_task) } } + tile_start_x += (syn->pp.column_width_minus1[k] + 1); } hal_h265e_dbg_detail("vpu client is sending %d regs", length); diff --git a/mpp/hal/rkenc/h265e/hal_h265e_vepu580.c b/mpp/hal/rkenc/h265e/hal_h265e_vepu580.c index 22a52a30..935edb58 100644 --- a/mpp/hal/rkenc/h265e/hal_h265e_vepu580.c +++ b/mpp/hal/rkenc/h265e/hal_h265e_vepu580.c @@ -254,7 +254,8 @@ static RK_U8 lvl16_intra_cst_wgt[8] = {17, 17, 17, 18, 17, 18, 18}; #include "hal_h265e_vepu580_tune.c" -static void vepu580_h265_set_me_ram(H265eSyntax_new *syn, hevc_vepu580_base *regs, RK_U32 index) +static void vepu580_h265_set_me_ram(H265eSyntax_new *syn, hevc_vepu580_base *regs, + RK_U32 index, RK_S32 tile_start_x) { RK_S32 frm_sta = 0, frm_end = 0, pic_w = 0; RK_S32 srch_w = regs->reg0220_me_rnge.cme_srch_h * 4; @@ -277,14 +278,8 @@ static void vepu580_h265_set_me_ram(H265eSyntax_new *syn, hevc_vepu580_base *reg } frm_end = mpp_clip(frm_end, 0, pic_wd64 - 1); } else { - RK_S32 tile_ctu_stax = index * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1); - RK_S32 tile_ctu_endx = 0; - - if (index == syn->pp.num_tile_columns_minus1) { - tile_ctu_endx = ((regs->reg0196_enc_rsl.pic_wd8_m1 + 1) * 8 + 63) / 64 - 1; - } else { - tile_ctu_endx = (index + 1) * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1) - 1; - } + RK_S32 tile_ctu_stax = tile_start_x; + RK_S32 tile_ctu_endx = tile_start_x + syn->pp.column_width_minus1[index]; if (x_gmv - srch_w < 0) { frm_sta = tile_ctu_stax + (x_gmv - srch_w - 15) / 16; @@ -2787,13 +2782,12 @@ MPP_RET hal_h265e_v580_gen_regs(void *hal, HalEncTask *task) return MPP_OK; } -void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *syn, RK_U32 index) +void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *syn, + RK_U32 index, RK_S32 tile_start_x) { if (syn->pp.tiles_enabled_flag) { - RK_S32 mb_w = MPP_ALIGN(syn->pp.pic_width, 64) / 64; RK_S32 mb_h = MPP_ALIGN(syn->pp.pic_height, 64) / 64; - RK_S32 tile_width = (index + 1) * mb_w / (syn->pp.num_tile_columns_minus1 + 1) - - index * mb_w / (syn->pp.num_tile_columns_minus1 + 1); + RK_S32 tile_width = syn->pp.column_width_minus1[index] + 1; if (!regs->reg0192_enc_pic.cur_frm_ref && !(regs->reg0238_synt_pps.lpf_fltr_acrs_til && @@ -2819,7 +2813,6 @@ void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *s regs->reg0193_dual_core.dchs_ofst = 2; if (index == syn->pp.num_tile_columns_minus1) { - tile_width = mb_w - index * mb_w / (syn->pp.num_tile_columns_minus1 + 1); regs->reg0193_dual_core.dchs_txid = 0; regs->reg0193_dual_core.dchs_txe = 0; } @@ -2827,7 +2820,7 @@ void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *s regs->reg0252_tile_cfg.tile_h_m1 = mb_h - 1; regs->reg212_rc_cfg.rc_ctu_num = tile_width; regs->reg0252_tile_cfg.tile_en = syn->pp.tiles_enabled_flag; - regs->reg0253_tile_pos.tile_x = (index * mb_w / (syn->pp.num_tile_columns_minus1 + 1)); + regs->reg0253_tile_pos.tile_x = tile_start_x; regs->reg0253_tile_pos.tile_y = 0; hal_h265e_dbg_detail("tile_x %d, rc_ctu_num %d, tile_width_m1 %d", @@ -2846,6 +2839,7 @@ MPP_RET hal_h265e_v580_start(void *hal, HalEncTask *enc_task) Vepu580H265eFrmCfg *frm = ctx->frm; RK_U32 k = 0; MPP_RET ret = MPP_OK; + RK_S32 tile_start_x = 0; hal_h265e_enter(); @@ -2881,12 +2875,12 @@ MPP_RET hal_h265e_v580_start(void *hal, HalEncTask *enc_task) if (k) memcpy(hw_regs, frm->regs_set[0], sizeof(*hw_regs)); - vepu580_h265_set_me_ram(syn, reg_base, k); + vepu580_h265_set_me_ram(syn, reg_base, k, tile_start_x); /* set input info */ vepu580_h265_set_patch_info(frm->reg_cfg, syn, (Vepu541Fmt)fmt->format, enc_task); if (tile_num > 1) - hal_h265e_v580_set_uniform_tile(reg_base, syn, k); + hal_h265e_v580_set_uniform_tile(reg_base, syn, k, tile_start_x); if (k) { RK_U32 offset = 0; @@ -2955,6 +2949,7 @@ MPP_RET hal_h265e_v580_start(void *hal, HalEncTask *enc_task) } else mpp_dev_ioctl(ctx->dev, MPP_DEV_DELIMIT, NULL); } + tile_start_x += (syn->pp.column_width_minus1[k] + 1); } ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);