feat[vepu580]: Optimize hal processing for smart encoding

Signed-off-by: Tingjin Huang <timkingh.huang@rock-chips.com>
Change-Id: I27ec19fcfa6c71bb1d084fea7d40853fed3e4603
This commit is contained in:
Tingjin Huang
2024-08-13 16:13:37 +08:00
committed by Herman Chen
parent e03714e193
commit df0b9e3a0d
4 changed files with 279 additions and 20 deletions

View File

@@ -99,6 +99,12 @@ typedef struct HalH264eVepu580Ctx_t {
/* finetune */
void *tune;
MppBuffer qpmap_base_cfg_buf;
MppBuffer qpmap_qp_cfg_buf;
RK_U8* md_flag_buf;
RK_S32 qpmap_base_cfg_size;
RK_S32 qpmap_qp_cfg_size;
RK_S32 md_flag_size;
/* two-pass deflicker */
MppBuffer buf_pass1;
@@ -261,6 +267,20 @@ static MPP_RET hal_h264e_vepu580_deinit(void *hal)
p->tune = NULL;
}
if (p->qpmap_base_cfg_buf) {
mpp_buffer_put(p->qpmap_base_cfg_buf);
p->qpmap_base_cfg_buf = NULL;
}
if (p->qpmap_qp_cfg_buf) {
mpp_buffer_put(p->qpmap_qp_cfg_buf);
p->qpmap_qp_cfg_buf = NULL;
}
if (p->md_flag_buf) {
MPP_FREE(p->md_flag_buf);
}
hal_h264e_dbg_func("leave %p\n", p);
return MPP_OK;
@@ -1318,6 +1338,10 @@ static void setup_vepu580_rc_base(HalVepu580RegSet *regs, HalH264eVepu580Ctx *ct
regs->reg_rc_klut.madi_thd.madi_thd2 = 15;
}
if (cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC) {
regs->reg_base.rc_qp.rc_qp_range = 0;
}
hal_h264e_dbg_func("leave\n");
}
@@ -2129,7 +2153,6 @@ static MPP_RET hal_h264e_vepu580_gen_regs(void *hal, HalEncTask *task)
regs->reg_base.meiw_addr = task->md_info ? mpp_buffer_get_fd(task->md_info) : 0;
regs->reg_base.enc_pic.mei_stor = task->md_info ? 1 : 0;
regs->reg_base.pic_ofst.pic_ofst_y = mpp_frame_get_offset_y(task->frame);
regs->reg_base.pic_ofst.pic_ofst_x = mpp_frame_get_offset_x(task->frame);
@@ -2139,6 +2162,13 @@ static MPP_RET hal_h264e_vepu580_gen_regs(void *hal, HalEncTask *task)
if (frm_status->is_i_refresh)
setup_vepu580_intra_refresh(regs, ctx, frm_status->seq_idx % cfg->rc.gop);
if (cfg->tune.qpmap_en && (!rc_task->info.complex_scene) &&
cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC &&
cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC) {
if (MPP_OK != setup_vepu580_qpmap_buf(ctx))
mpp_err("qpmap malloc buffer failed!\n");
}
vepu580_set_osd(&ctx->osd_cfg);
setup_vepu580_l2(regs, slice, &cfg->hw);
setup_vepu580_ext_line_buf(regs, ctx);
@@ -2464,6 +2494,36 @@ static MPP_RET hal_h264e_vepu580_ret_task(void * hal, HalEncTask * task)
task->hal_ret.data = &ctx->hal_rc_cfg;
task->hal_ret.number = 1;
//RK_U32 madi_th_cnt0 = ctx->regs_set->reg_st.madi_b16num0;
RK_U32 madi_th_cnt1 = ctx->regs_set->reg_st.madi_b16num1;
RK_U32 madi_th_cnt2 = ctx->regs_set->reg_st.madi_b16num2;
RK_U32 madi_th_cnt3 = ctx->regs_set->reg_st.madi_b16num3;
//RK_U32 madp_th_cnt0 = ctx->regs_set->reg_st.md_sad_b16num0;
RK_U32 madp_th_cnt1 = ctx->regs_set->reg_st.md_sad_b16num1;
RK_U32 madp_th_cnt2 = ctx->regs_set->reg_st.md_sad_b16num2;
RK_U32 madp_th_cnt3 = ctx->regs_set->reg_st.md_sad_b16num3;
RK_U32 md_cnt = (24 * madp_th_cnt3 + 22 * madp_th_cnt2 + 17 * madp_th_cnt1) >> 2;
RK_U32 madi_cnt = (6 * madi_th_cnt3 + 5 * madi_th_cnt2 + 4 * madi_th_cnt1) >> 2;
rc_info->motion_level = 0;
if (md_cnt * 100 > 15 * mbs)
rc_info->motion_level = 2;
else if (md_cnt * 100 > 5 * mbs)
rc_info->motion_level = 1;
else
rc_info->motion_level = 0;
rc_info->complex_level = 0;
if (madi_cnt * 100 > 30 * mbs)
rc_info->complex_level = 2;
else if (madi_cnt * 100 > 13 * mbs)
rc_info->complex_level = 1;
else
rc_info->complex_level = 0;
hal_h264e_dbg_rc("motion_level %u, complex_level %u\n", rc_info->motion_level, rc_info->complex_level);
vepu580_h264e_tune_stat_update(ctx->tune, task);
mpp_dev_multi_offset_reset(ctx->offsets);

View File

@@ -133,7 +133,9 @@ static void vepu580_h264e_tune_reg_patch(void *p)
/* modify register here */
if (slice->slice_type != H264_I_SLICE) {
RK_U32 *src = tune->curr_scene_motion_flag ? &h264e_klut_weight[0] : &h264e_klut_weight[4];
RK_U32 *src = (tune->curr_scene_motion_flag ||
ctx->cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC) ?
&h264e_klut_weight[0] : &h264e_klut_weight[4];
memcpy(&regs->reg_rc_klut.klut_wgt0, src, CHROMA_KLUT_TAB_SIZE);
}
@@ -333,3 +335,52 @@ static void vepu580_h264e_tune_stat_update(void *p, HalEncTask *task)
tune->pre_madi[1] = tune->pre_madi[0];
tune->pre_madp[1] = tune->pre_madp[0];
}
static MPP_RET setup_vepu580_qpmap_buf(HalH264eVepu580Ctx *ctx)
{
MPP_RET ret = MPP_OK;
RK_S32 mb_w = MPP_ALIGN(ctx->cfg->prep.width, 64) / 16;
RK_S32 mb_h = MPP_ALIGN(ctx->cfg->prep.height, 16) / 16;
RK_S32 qpmap_base_cfg_size = ctx->qpmap_base_cfg_size
= mb_w * mb_h * 8;
RK_S32 qpmap_qp_cfg_size = ctx->qpmap_qp_cfg_size
= mb_w * mb_h * 2;
RK_S32 md_flag_size = ctx->md_flag_size
= mb_w * mb_h;
if (!ctx->cfg->tune.qpmap_en) {
mpp_log("qpmap_en is closed!\n");
goto __RET;
}
if (NULL == ctx->qpmap_base_cfg_buf) {
mpp_buffer_get(NULL, &ctx->qpmap_base_cfg_buf, qpmap_base_cfg_size);
if (!ctx->qpmap_base_cfg_buf) {
mpp_err("qpmap_base_cfg_buf malloc fail, qpmap invalid\n");
ret = MPP_ERR_VALUE;
goto __RET;
}
}
if (NULL == ctx->qpmap_qp_cfg_buf) {
mpp_buffer_get(NULL, &ctx->qpmap_qp_cfg_buf, qpmap_qp_cfg_size);
if (!ctx->qpmap_qp_cfg_buf) {
mpp_err("qpmap_qp_cfg_buf malloc fail, qpmap invalid\n");
ret = MPP_ERR_VALUE;
goto __RET;
}
}
if (NULL == ctx->md_flag_buf) {
ctx->md_flag_buf = mpp_malloc(RK_U8, md_flag_size);
if (!ctx->md_flag_buf) {
mpp_err("md_flag_buf malloc fail, qpmap invalid\n");
ret = MPP_ERR_VALUE;
goto __RET;
}
}
__RET:
hal_h264e_dbg_func("leave, ret %d\n", ret);
return ret;
}

View File

@@ -81,13 +81,43 @@ typedef struct vepu580_h265_fbk_t {
RK_U32 st_ctu_num;
} Vepu580H265Fbk;
typedef struct Vepu580RoiH265BsCfg_t {
typedef struct Vepu580RoiHevcBsCfg_t {
RK_U8 amv_en : 1;
RK_U8 qp_adj : 1;
RK_U8 force_split : 1;
RK_U8 force_intra : 2;
RK_U8 force_inter : 2;
} Vepu580RoiH265BsCfg;
} Vepu580RoiHevcBsCfg;
typedef struct Vepu580MdInfo_t {
RK_U8 vld;
RK_U16 sad[16];
} Vepu580MdInfo;
typedef struct Vepu580RoiHevcQpCfg_t {
RK_U16 reserved : 4;
/*
* Qp area index
* The choosed qp area index.
*/
RK_U16 qp_area_idx : 4;
/*
* Qp_adj
* Qp_adj
* in absolute qp mode qp_adj is the final qp used by encoder
* in relative qp mode qp_adj is a adjustment to final qp
*/
RK_S16 qp_adj : 7;
/*
* Qp_adj_mode
* Qp adjustment mode
* 1 - absolute qp mode:
* the 16x16 MB qp is set to the qp_adj value
* 0 - relative qp mode
* the 16x16 MB qp is adjusted by qp_adj value
*/
RK_U16 qp_adj_mode : 1;
} Vepu580RoiHevcQpCfg;
typedef struct Vepu580H265eFrmCfg_t {
RK_S32 frame_count;
@@ -184,6 +214,12 @@ typedef struct H265eV580HalContext_t {
/* finetune */
void *tune;
MppBuffer qpmap_base_cfg_buf;
MppBuffer qpmap_qp_cfg_buf;
RK_U8* md_flag_buf;
RK_S32 qpmap_base_cfg_size;
RK_S32 qpmap_qp_cfg_size;
RK_S32 md_flag_size;
} H265eV580HalContext;
static RK_U32 aq_thd_default[16] = {
@@ -202,11 +238,12 @@ static RK_U32 h265e_mode_bias[16] = {
static RK_S32 aq_qp_dealt_default[16] = {
-8, -7, -6, -5,
-4, -2, -1, -1,
0, 2, 3, 4,
-4, -3, -2, -1,
0, 1, 2, 3,
5, 7, 8, 9,
};
static RK_U16 lvl32_intra_cst_thd[4] = {2, 6, 16, 36};
static RK_U16 lvl16_intra_cst_thd[4] = {2, 6, 16, 36};
@@ -1337,7 +1374,6 @@ static void vepu580_h265_global_cfg_set(H265eV580HalContext *ctx, H265eV580RegSe
rc_regs->madi_thd.madi_thd0 = 25;
rc_regs->madi_thd.madi_thd1 = 35;
rc_regs->madi_thd.madi_thd2 = 45;
reg_wgt->reg1484_qnt_bias_comb.qnt_bias_i = 171;
reg_wgt->reg1484_qnt_bias_comb.qnt_bias_p = 85;
@@ -1446,6 +1482,21 @@ MPP_RET hal_h265e_v580_deinit(void *hal)
vepu580_h265e_tune_deinit(ctx->tune);
ctx->tune = NULL;
}
if (ctx->qpmap_base_cfg_buf) {
mpp_buffer_put(ctx->qpmap_base_cfg_buf);
ctx->qpmap_base_cfg_buf = NULL;
}
if (ctx->qpmap_qp_cfg_buf) {
mpp_buffer_put(ctx->qpmap_qp_cfg_buf);
ctx->qpmap_qp_cfg_buf = NULL;
}
if (ctx->md_flag_buf) {
MPP_FREE(ctx->md_flag_buf);
}
hal_h265e_leave();
return MPP_OK;
}
@@ -1900,6 +1951,10 @@ static MPP_RET vepu580_h265_set_rc_regs(H265eV580HalContext *ctx, H265eV580RegSe
reg_rc->rc_adj1.qp_adj6 = 0;
reg_rc->rc_adj1.qp_adj7 = 0;
reg_rc->rc_adj1.qp_adj8 = 0;
if (rc->rc_mode == MPP_ENC_RC_MODE_SMTRC) {
reg_base->reg213_rc_qp.rc_qp_range = 0;
}
}
reg_rc->roi_qthd0.qpmin_area0 = h265->qpmin_map[0] > 0 ? h265->qpmin_map[0] : rc_cfg->quality_min;
@@ -2517,7 +2572,7 @@ static MPP_RET vepu580_h265e_use_pass1_patch(H265eV580RegSet *regs, H265eV580Hal
return MPP_OK;
}
static void setup_vepu580_split(H265eV580RegSet *regs, MppEncCfgSet *enc_cfg, RK_U32 title_en)
static void vepu580_setup_split(H265eV580RegSet *regs, MppEncCfgSet *enc_cfg, RK_U32 title_en)
{
MppEncSliceSplit *cfg = &enc_cfg->split;
@@ -2596,6 +2651,7 @@ MPP_RET hal_h265e_v580_gen_regs(void *hal, HalEncTask *task)
hevc_vepu580_control_cfg *reg_ctl = &regs->reg_ctl;
hevc_vepu580_base *reg_base = &regs->reg_base;
hevc_vepu580_rc_klut *reg_klut = &regs->reg_rc_klut;
MppEncCfgSet *cfg = ctx->cfg;
hal_h265e_enter();
pic_width_align8 = (syn->pp.pic_width + 7) & (~7);
@@ -2713,11 +2769,19 @@ MPP_RET hal_h265e_v580_gen_regs(void *hal, HalEncTask *task)
if (frm->is_i_refresh)
setup_intra_refresh(ctx, frm->seq_idx % ctx->cfg->rc.gop);
if (cfg->tune.qpmap_en && (!rc_task->info.complex_scene) &&
cfg->rc.rc_mode == MPP_ENC_RC_MODE_SMTRC &&
cfg->tune.scene_mode == MPP_ENC_SCENE_MODE_IPC) {
if (MPP_OK != vepu580_setup_qpmap_buf(ctx))
mpp_err("qpmap malloc buffer failed!\n");
}
/*paramet cfg*/
vepu580_h265_global_cfg_set(ctx, regs);
vepu580_h265e_tune_reg_patch(ctx->tune);
setup_vepu580_split(regs, ctx->cfg, syn->pp.tiles_enabled_flag);
vepu580_setup_split(regs, cfg, syn->pp.tiles_enabled_flag);
hal_h265e_leave();
return MPP_OK;
@@ -3299,8 +3363,8 @@ MPP_RET hal_h265e_v580_get_task(void *hal, HalEncTask *task)
MPP_RET hal_h265e_v580_ret_task(void *hal, HalEncTask *task)
{
H265eV580HalContext *ctx = (H265eV580HalContext *)hal;
HalEncTask *enc_task = task;
EncRcTaskInfo *rc_info = &task->rc_task->info;
HalEncTask *enc_task = task;
RK_S32 task_idx = task->flags.reg_idx;
Vepu580H265eFrmCfg *frm = ctx->frms[task_idx];
Vepu580H265Fbk *fb = &frm->feedback;
@@ -3360,7 +3424,7 @@ MPP_RET hal_h265e_v580_ret_task(void *hal, HalEncTask *task)
enc_task->hw_length = fb->out_strm_size;
enc_task->length += fb->out_strm_size;
vepu580_h265e_tune_stat_update(ctx->tune);
vepu580_h265e_tune_stat_update(ctx->tune, rc_info);
h265e_dpb_hal_end(ctx->dpb, frm->hal_curr_idx);
h265e_dpb_hal_end(ctx->dpb, frm->hal_refr_idx);

View File

@@ -15,10 +15,14 @@
*/
#include "vepu580_tune.h"
#include "hal_h265e_vepu580_reg.h"
#define HAL_H265E_DBG_CONTENT (0x00200000)
#define hal_h264e_dbg_content(fmt, ...) hal_h264e_dbg_f(HAL_H264E_DBG_CONTENT, fmt, ## __VA_ARGS__)
#define CTU_BASE_CFG_BYTE 64
#define CTU_QP_CFG_BYTE 192
/*
* Please follow the configuration below:
*
@@ -254,8 +258,10 @@ static void vepu580_h265e_tune_reg_patch(void *p)
return;
}
memcpy(&reg_wgt->lvl32_intra_CST_WGT0, lvl32_preintra_cst_wgt[scene_motion_flag], sizeof(lvl32_preintra_cst_wgt[scene_motion_flag]));
memcpy(&reg_wgt->lvl16_intra_CST_WGT0, lvl16_preintra_cst_wgt[scene_motion_flag], sizeof(lvl16_preintra_cst_wgt[scene_motion_flag]));
memcpy(&reg_wgt->lvl32_intra_CST_WGT0, lvl32_preintra_cst_wgt[scene_motion_flag],
sizeof(lvl32_preintra_cst_wgt[scene_motion_flag]));
memcpy(&reg_wgt->lvl16_intra_CST_WGT0, lvl16_preintra_cst_wgt[scene_motion_flag],
sizeof(lvl16_preintra_cst_wgt[scene_motion_flag]));
p_rdo_atf_skip = &reg_rdo->rdo_b64_skip_atf;
p_rdo_atf_skip->rdo_b_cime_thd0.cu_rdo_cime_thd0 = 1;
@@ -390,9 +396,9 @@ static void vepu580_h265e_tune_reg_patch(void *p)
reg_rdo->preintra_b16_cst_wgt.pre_intra16_cst_wgt00 = pre_intra_b16_cost[scene_motion_flag][0];
reg_rdo->preintra_b16_cst_wgt.pre_intra16_cst_wgt01 = pre_intra_b16_cost[scene_motion_flag][1];
rc_regs->md_sad_thd.md_sad_thd0 = 4;
rc_regs->md_sad_thd.md_sad_thd1 = 9;
rc_regs->md_sad_thd.md_sad_thd2 = 15;
rc_regs->md_sad_thd.md_sad_thd0 = 7;
rc_regs->md_sad_thd.md_sad_thd1 = 15;
rc_regs->md_sad_thd.md_sad_thd2 = 25;
rc_regs->madi_thd.madi_thd0 = 4;
rc_regs->madi_thd.madi_thd1 = 9;
rc_regs->madi_thd.madi_thd2 = 15;
@@ -464,9 +470,10 @@ static void vepu580_h265e_tune_reg_patch(void *p)
reg_wgt->fme_sqi_thd0.cime_sad_pu32_th = fme_sqi_cime_sad_pu32_th[scene_motion_flag];
reg_wgt->fme_sqi_thd1.cime_sad_pu64_th = fme_sqi_cime_sad_pu64_th[scene_motion_flag];
rc_regs->klut_ofst.chrm_klut_ofst = chrm_klut_ofst[scene_motion_flag];
}
static void vepu580_h265e_tune_stat_update(void *p)
static void vepu580_h265e_tune_stat_update(void *p, EncRcTaskInfo *rc_info)
{
HalH265eVepu580Tune *tune = (HalH265eVepu580Tune *)p;
H265eV580HalContext *ctx = NULL;
@@ -503,9 +510,16 @@ static void vepu580_h265e_tune_stat_update(void *p)
RK_S32 nScore = 0;
RK_S32 nScoreT = ((MD_WIN_LEN - 2) * 6 + 2 * 8 + 2 * 11 + 2 * 13) / 2;
RK_S32 madp_cnt_statistics[5];
RK_U32 md_cnt = (24 * fb->st_md_sad_b16num3 + 22 * fb->st_md_sad_b16num2 + 17 *
fb->st_md_sad_b16num1) >> 2;
RK_U32 madi_cnt = (6 * fb->st_madi_b16num3 + 5 * fb->st_madi_b16num2 + 4 *
fb->st_madi_b16num1) >> 2;
RK_U32 mbs = ((ctx->cfg->prep.width + 15) / 16) * ((ctx->cfg->prep.height + 15) / 16);
for (i = 0; i < 5; i++) {
madp_cnt_statistics[i] = fb->st_md_sad_b16num0 * madp_num_map[i][0] + fb->st_md_sad_b16num1 * madp_num_map[i][1]
+ fb->st_md_sad_b16num2 * madp_num_map[i][2] + fb->st_md_sad_b16num3 * madp_num_map[i][3];
madp_cnt_statistics[i] = fb->st_md_sad_b16num0 * madp_num_map[i][0] +
fb->st_md_sad_b16num1 * madp_num_map[i][1] +
fb->st_md_sad_b16num2 * madp_num_map[i][2] +
fb->st_md_sad_b16num3 * madp_num_map[i][3];
}
tune->pre_madi[0] = fb->st_madi;
@@ -558,7 +572,8 @@ static void vepu580_h265e_tune_stat_update(void *p)
tune->curr_scene_motion_flag = 0;
if (tune->md_flag_matrix[0] && tune->md_flag_matrix[1] && tune->md_flag_matrix[2]) {
tune->curr_scene_motion_flag = 1;
} else if ((tune->md_flag_matrix[0] && tune->md_flag_matrix[1]) || (tune->md_flag_matrix[1] && tune->md_flag_matrix[2] && tune->md_flag_matrix[3])) {
} else if ((tune->md_flag_matrix[0] && tune->md_flag_matrix[1]) ||
(tune->md_flag_matrix[1] && tune->md_flag_matrix[2] && tune->md_flag_matrix[3])) {
tune->curr_scene_motion_flag = md_flag;
}
@@ -580,4 +595,73 @@ static void vepu580_h265e_tune_stat_update(void *p)
tune->pre_madi[1] = tune->pre_madi[0];
tune->pre_madp[1] = tune->pre_madp[0];
rc_info->motion_level = 0;
if (md_cnt * 100 > 15 * mbs)
rc_info->motion_level = 2;
else if (md_cnt * 100 > 5 * mbs)
rc_info->motion_level = 1;
else
rc_info->motion_level = 0;
rc_info->complex_level = 0;
if (madi_cnt * 100 > 30 * mbs)
rc_info->complex_level = 2;
else if (madi_cnt * 100 > 13 * mbs)
rc_info->complex_level = 1;
else
rc_info->complex_level = 0;
hal_h265e_dbg_detail("motion_level = %u, complex_level = %u\n", rc_info->motion_level,
rc_info->complex_level);
}
static MPP_RET vepu580_setup_qpmap_buf(H265eV580HalContext *ctx)
{
MPP_RET ret = MPP_OK;
RK_S32 w = ctx->cfg->prep.width;
RK_S32 h = ctx->cfg->prep.height;
RK_S32 ctu_w = MPP_ALIGN(w, 64) / 64;
RK_S32 ctu_h = MPP_ALIGN(h, 64) / 64;
RK_S32 qpmap_base_cfg_size = ctx->qpmap_base_cfg_size
= ctu_w * ctu_h * 64;
RK_S32 qpmap_qp_cfg_size = ctx->qpmap_qp_cfg_size
= ctu_w * ctu_h * 192;
RK_S32 md_flag_size = ctx->md_flag_size
= ctu_w * ctu_h * 16;
if (!ctx->cfg->tune.qpmap_en) {
mpp_log("qpmap_en is closed!\n");
goto __RET;
}
if (NULL == ctx->qpmap_base_cfg_buf) {
mpp_buffer_get(NULL, &ctx->qpmap_base_cfg_buf, qpmap_base_cfg_size);
if (!ctx->qpmap_base_cfg_buf) {
mpp_err("qpmap_base_cfg_buf malloc fail, qpmap invalid\n");
ret = MPP_ERR_VALUE;
goto __RET;
}
}
if (NULL == ctx->qpmap_qp_cfg_buf) {
mpp_buffer_get(NULL, &ctx->qpmap_qp_cfg_buf, qpmap_qp_cfg_size);
if (!ctx->qpmap_qp_cfg_buf) {
mpp_err("qpmap_qp_cfg_buf malloc fail, qpmap invalid\n");
ret = MPP_ERR_VALUE;
goto __RET;
}
}
if (NULL == ctx->md_flag_buf) {
ctx->md_flag_buf = mpp_malloc(RK_U8, md_flag_size);
if (!ctx->md_flag_buf) {
mpp_err("md_flag_buf malloc fail, qpmap invalid\n");
ret = MPP_ERR_VALUE;
goto __RET;
}
}
__RET:
hal_h265e_dbg_func("leave, ret %d\n", ret);
return ret;
}