refactor[h265]: unify calculation tile width

Change-Id: Ib5200e332cc5be47f79561570c0342e7690e4587
Signed-off-by: xiaoxu.chen <xiaoxu.chen@rock-chips.com>
This commit is contained in:
xiaoxu.chen
2024-09-25 11:36:24 +08:00
parent c16dfcea7d
commit a4bd0d2a69
4 changed files with 45 additions and 41 deletions

View File

@@ -458,6 +458,10 @@ MPP_RET h265e_set_pps(H265eCtx *ctx, H265ePps *pps, H265eSps *sps)
pps->m_loopFilterAcrossTilesEnabledFlag = !codec->lpf_acs_tile_disable;
{
RockchipSocType soc_type = mpp_get_soc_type();
RK_S32 index;
RK_S32 mb_w = (sps->m_picWidthInLumaSamples + sps->m_maxCUSize - 1) / sps->m_maxCUSize;
RK_S32 mb_h = (sps->m_picHeightInLumaSamples + sps->m_maxCUSize - 1) / sps->m_maxCUSize;
RK_S32 tile_width;
/* check tile support on rk3566 and rk3568 */
if (soc_type == ROCKCHIP_SOC_RK3566 || soc_type == ROCKCHIP_SOC_RK3568) {
@@ -477,7 +481,18 @@ MPP_RET h265e_set_pps(H265eCtx *ctx, H265ePps *pps, H265eSps *sps)
if (pps->m_nNumTileColumnsMinus1) {
pps->m_tiles_enabled_flag = 1;
pps->m_bTileUniformSpacing = 1;
pps->m_loopFilterAcrossTilesEnabledFlag = !codec->lpf_acs_tile_disable;;
pps->m_loopFilterAcrossTilesEnabledFlag = !codec->lpf_acs_tile_disable;
/* calc width per tile */
for (index = 0; index < pps->m_nNumTileColumnsMinus1; index++) {
tile_width = (index + 1) * mb_w / (pps->m_nNumTileColumnsMinus1 + 1) -
index * mb_w / (pps->m_nNumTileColumnsMinus1 + 1);
pps->m_nTileColumnWidthArray[index] = tile_width;
pps->m_nTileRowHeightArray[index] = mb_h;
}
tile_width = mb_w - index * mb_w / (pps->m_nNumTileColumnsMinus1 + 1);
pps->m_nTileColumnWidthArray[index] = tile_width;
pps->m_nTileRowHeightArray[index] = mb_h;
}
}

View File

@@ -120,11 +120,11 @@ static void fill_picture_parameters(const H265eCtx *h,
pp->num_tile_columns_minus1 = pps->m_nNumTileColumnsMinus1;
pp->num_tile_rows_minus1 = pps->m_nNumTileRowsMinus1;
for (i = 0; i < pp->num_tile_columns_minus1; i++)
pp->column_width_minus1[i] = pps->m_nTileColumnWidthArray[i];
for (i = 0; i <= pp->num_tile_columns_minus1; i++)
pp->column_width_minus1[i] = pps->m_nTileColumnWidthArray[i] - 1;
for (i = 0; i < pp->num_tile_rows_minus1; i++)
pp->row_height_minus1[i] = pps->m_nTileRowHeightArray[i];
for (i = 0; i <= pp->num_tile_rows_minus1; i++)
pp->row_height_minus1[i] = pps->m_nTileRowHeightArray[i] - 1;
}
}

View File

@@ -1331,7 +1331,8 @@ static void vepu541_h265_set_me_regs(H265eV541HalContext *ctx, H265eSyntax_new *
regs->me_ram.cach_l2_tag = 0x3;
}
static void vepu540_h265_set_me_ram(H265eSyntax_new *syn, H265eV541RegSet *regs, RK_U32 index)
static void vepu540_h265_set_me_ram(H265eSyntax_new *syn, H265eV541RegSet *regs,
RK_U32 index, RK_S32 tile_start_x)
{
RK_U32 cime_w = 11, cime_h = 7;
RK_U32 pic_cime_temp = 0;
@@ -1340,16 +1341,10 @@ static void vepu540_h265_set_me_ram(H265eSyntax_new *syn, H265eV541RegSet *regs,
regs->me_ram.cime_linebuf_w = pic_cime_temp / 64;
} else {
RK_S32 pic_wd64 = MPP_ALIGN(syn->pp.pic_width, 64) >> 6;
RK_S32 tile_ctu_stax = index * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1);
RK_S32 tile_ctu_endx = 0;
RK_S32 tile_ctu_stax = tile_start_x;
RK_S32 tile_ctu_endx = tile_start_x + syn->pp.column_width_minus1[index];
RK_S32 cime_srch_w = regs->me_rnge.cime_srch_h;
if (index == syn->pp.num_tile_columns_minus1) {
tile_ctu_endx = ((regs->enc_rsl.pic_wd8_m1 + 1) * 8 + 63) / 64 - 1;
} else {
tile_ctu_endx = (index + 1) * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1) - 1;
}
if (tile_ctu_stax < (cime_srch_w + 3) / 4) {
if (tile_ctu_endx + 1 + (cime_srch_w + 3) / 4 > pic_wd64)
pic_cime_temp = pic_wd64 * 64;
@@ -1668,21 +1663,18 @@ MPP_RET hal_h265e_v541_gen_regs(void *hal, HalEncTask *task)
hal_h265e_leave();
return MPP_OK;
}
void hal_h265e_v540_set_uniform_tile(H265eV541RegSet *regs, H265eSyntax_new *syn, RK_U32 index)
void hal_h265e_v540_set_uniform_tile(H265eV541RegSet *regs, H265eSyntax_new *syn,
RK_U32 index, RK_S32 tile_start_x)
{
if (syn->pp.tiles_enabled_flag) {
RK_S32 mb_w = MPP_ALIGN(syn->pp.pic_width, 64) / 64;
RK_S32 mb_h = MPP_ALIGN(syn->pp.pic_height, 64) / 64;
RK_S32 tile_width = (index + 1) * mb_w / (syn->pp.num_tile_columns_minus1 + 1) -
index * mb_w / (syn->pp.num_tile_columns_minus1 + 1);
if (index == syn->pp.num_tile_columns_minus1) {
tile_width = mb_w - index * mb_w / (syn->pp.num_tile_columns_minus1 + 1);
}
RK_S32 tile_width = syn->pp.column_width_minus1[index] + 1;
regs->tile_cfg.tile_width_m1 = tile_width - 1;
regs->tile_cfg.tile_height_m1 = mb_h - 1;
regs->rc_cfg.rc_ctu_num = tile_width;
regs->tile_cfg.tile_en = syn->pp.tiles_enabled_flag;
regs->tile_pos.tile_x = (index * mb_w / (syn->pp.num_tile_columns_minus1 + 1));
regs->tile_pos.tile_x = tile_start_x;
regs->tile_pos.tile_y = 0;
if (index > 0) {
RK_U32 tmp = regs->lpfr_addr_hevc;
@@ -1707,6 +1699,7 @@ MPP_RET hal_h265e_v540_start(void *hal, HalEncTask *enc_task)
hal_h265e_enter();
RK_U32 stream_len = 0;
VepuFmtCfg *fmt = (VepuFmtCfg *)ctx->input_fmt;
RK_S32 tile_start_x = 0;
if (enc_task->flags.err) {
hal_h265e_err("enc_task->flags.err %08x, return e arly",
@@ -1721,13 +1714,13 @@ MPP_RET hal_h265e_v540_start(void *hal, HalEncTask *enc_task)
MppDevRegWrCfg cfg;
MppDevRegRdCfg cfg1;
vepu540_h265_set_me_ram(syn, hw_regs, k);
vepu540_h265_set_me_ram(syn, hw_regs, k, tile_start_x);
/* set input info */
vepu541_h265_set_l2_regs(ctx, (H265eV54xL2RegSet*)ctx->l2_regs);
vepu541_h265_set_patch_info(ctx->dev, syn, (Vepu541Fmt)fmt->format, enc_task);
if (title_num > 1)
hal_h265e_v540_set_uniform_tile(hw_regs, syn, k);
hal_h265e_v540_set_uniform_tile(hw_regs, syn, k, tile_start_x);
if (k > 0) {
MppDevRegOffsetCfg cfg_fd;
RK_U32 offset = mpp_packet_get_length(enc_task->packet);
@@ -1807,6 +1800,7 @@ MPP_RET hal_h265e_v540_start(void *hal, HalEncTask *enc_task)
}
}
tile_start_x += (syn->pp.column_width_minus1[k] + 1);
}
hal_h265e_dbg_detail("vpu client is sending %d regs", length);

View File

@@ -254,7 +254,8 @@ static RK_U8 lvl16_intra_cst_wgt[8] = {17, 17, 17, 18, 17, 18, 18};
#include "hal_h265e_vepu580_tune.c"
static void vepu580_h265_set_me_ram(H265eSyntax_new *syn, hevc_vepu580_base *regs, RK_U32 index)
static void vepu580_h265_set_me_ram(H265eSyntax_new *syn, hevc_vepu580_base *regs,
RK_U32 index, RK_S32 tile_start_x)
{
RK_S32 frm_sta = 0, frm_end = 0, pic_w = 0;
RK_S32 srch_w = regs->reg0220_me_rnge.cme_srch_h * 4;
@@ -277,14 +278,8 @@ static void vepu580_h265_set_me_ram(H265eSyntax_new *syn, hevc_vepu580_base *reg
}
frm_end = mpp_clip(frm_end, 0, pic_wd64 - 1);
} else {
RK_S32 tile_ctu_stax = index * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1);
RK_S32 tile_ctu_endx = 0;
if (index == syn->pp.num_tile_columns_minus1) {
tile_ctu_endx = ((regs->reg0196_enc_rsl.pic_wd8_m1 + 1) * 8 + 63) / 64 - 1;
} else {
tile_ctu_endx = (index + 1) * pic_wd64 / (syn->pp.num_tile_columns_minus1 + 1) - 1;
}
RK_S32 tile_ctu_stax = tile_start_x;
RK_S32 tile_ctu_endx = tile_start_x + syn->pp.column_width_minus1[index];
if (x_gmv - srch_w < 0) {
frm_sta = tile_ctu_stax + (x_gmv - srch_w - 15) / 16;
@@ -2787,13 +2782,12 @@ MPP_RET hal_h265e_v580_gen_regs(void *hal, HalEncTask *task)
return MPP_OK;
}
void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *syn, RK_U32 index)
void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *syn,
RK_U32 index, RK_S32 tile_start_x)
{
if (syn->pp.tiles_enabled_flag) {
RK_S32 mb_w = MPP_ALIGN(syn->pp.pic_width, 64) / 64;
RK_S32 mb_h = MPP_ALIGN(syn->pp.pic_height, 64) / 64;
RK_S32 tile_width = (index + 1) * mb_w / (syn->pp.num_tile_columns_minus1 + 1) -
index * mb_w / (syn->pp.num_tile_columns_minus1 + 1);
RK_S32 tile_width = syn->pp.column_width_minus1[index] + 1;
if (!regs->reg0192_enc_pic.cur_frm_ref &&
!(regs->reg0238_synt_pps.lpf_fltr_acrs_til &&
@@ -2819,7 +2813,6 @@ void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *s
regs->reg0193_dual_core.dchs_ofst = 2;
if (index == syn->pp.num_tile_columns_minus1) {
tile_width = mb_w - index * mb_w / (syn->pp.num_tile_columns_minus1 + 1);
regs->reg0193_dual_core.dchs_txid = 0;
regs->reg0193_dual_core.dchs_txe = 0;
}
@@ -2827,7 +2820,7 @@ void hal_h265e_v580_set_uniform_tile(hevc_vepu580_base *regs, H265eSyntax_new *s
regs->reg0252_tile_cfg.tile_h_m1 = mb_h - 1;
regs->reg212_rc_cfg.rc_ctu_num = tile_width;
regs->reg0252_tile_cfg.tile_en = syn->pp.tiles_enabled_flag;
regs->reg0253_tile_pos.tile_x = (index * mb_w / (syn->pp.num_tile_columns_minus1 + 1));
regs->reg0253_tile_pos.tile_x = tile_start_x;
regs->reg0253_tile_pos.tile_y = 0;
hal_h265e_dbg_detail("tile_x %d, rc_ctu_num %d, tile_width_m1 %d",
@@ -2846,6 +2839,7 @@ MPP_RET hal_h265e_v580_start(void *hal, HalEncTask *enc_task)
Vepu580H265eFrmCfg *frm = ctx->frm;
RK_U32 k = 0;
MPP_RET ret = MPP_OK;
RK_S32 tile_start_x = 0;
hal_h265e_enter();
@@ -2881,12 +2875,12 @@ MPP_RET hal_h265e_v580_start(void *hal, HalEncTask *enc_task)
if (k)
memcpy(hw_regs, frm->regs_set[0], sizeof(*hw_regs));
vepu580_h265_set_me_ram(syn, reg_base, k);
vepu580_h265_set_me_ram(syn, reg_base, k, tile_start_x);
/* set input info */
vepu580_h265_set_patch_info(frm->reg_cfg, syn, (Vepu541Fmt)fmt->format, enc_task);
if (tile_num > 1)
hal_h265e_v580_set_uniform_tile(reg_base, syn, k);
hal_h265e_v580_set_uniform_tile(reg_base, syn, k, tile_start_x);
if (k) {
RK_U32 offset = 0;
@@ -2955,6 +2949,7 @@ MPP_RET hal_h265e_v580_start(void *hal, HalEncTask *enc_task)
} else
mpp_dev_ioctl(ctx->dev, MPP_DEV_DELIMIT, NULL);
}
tile_start_x += (syn->pp.column_width_minus1[k] + 1);
}
ret = mpp_dev_ioctl(ctx->dev, MPP_DEV_CMD_SEND, NULL);