diff --git a/mpp/codec/enc/h264/src/h264e_api.c b/mpp/codec/enc/h264/src/h264e_api.c index 646040e2..228daa63 100644 --- a/mpp/codec/enc/h264/src/h264e_api.c +++ b/mpp/codec/enc/h264/src/h264e_api.c @@ -255,7 +255,8 @@ MPP_RET h264e_config(void *ctx, RK_S32 cmd, void *param) ret = MPP_ERR_VALUE; } } - + p->rc->mb_per_frame = (((p->set->prep.width + 15) & (~15)) >> 4) * + (((p->set->prep.height + 15) & (~15)) >> 4); if (!ret) { mpp_log_f("MPP_ENC_SET_RC_CFG bps %d [%d : %d]\n", rc->bps_target, rc->bps_min, rc->bps_max); diff --git a/mpp/codec/inc/mpp_rc.h b/mpp/codec/inc/mpp_rc.h index 657651fe..63d0669b 100644 --- a/mpp/codec/inc/mpp_rc.h +++ b/mpp/codec/inc/mpp_rc.h @@ -77,6 +77,26 @@ typedef struct linear_model_s { RK_S32 weight_mode; /* different weight ratio*/ } MppLinReg; +/* Virtual buffer */ +typedef struct MppVirtualBuffer_s { + RK_S32 bufferSize; /* size of the virtual buffer */ + RK_S32 bitRate; /* input bit rate per second */ + RK_S32 bitPerPic; /* average number of bits per picture */ + RK_S32 picTimeInc; /* timeInc since last coded picture */ + RK_S32 timeScale; /* input frame rate numerator */ + RK_S32 unitsInTic; /* input frame rate denominator */ + RK_S32 virtualBitCnt; /* virtual (channel) bit count */ + RK_S32 realBitCnt; /* real bit count */ + RK_S32 bufferOccupancy; /* number of bits in the buffer */ + RK_S32 skipFrameTarget; /* how many frames should be skipped in a row */ + RK_S32 skippedFrames; /* how many frames have been skipped in a row */ + RK_S32 nonZeroTarget; + RK_S32 bucketFullness; /* Leaky Bucket fullness */ + RK_S32 gopRem; + RK_S32 windowRem; +} MppVirtualBuffer; + + typedef enum ENC_FRAME_TYPE_E { INTER_P_FRAME = 0, INTER_B_FRAME = 1, @@ -142,6 +162,7 @@ typedef struct MppRateControl_s { RK_S32 bits_per_pic; RK_S32 bits_per_intra; RK_S32 bits_per_inter; + RK_S32 mb_per_frame; /* bitrate window which tries to match target */ RK_S32 window_len; @@ -179,6 +200,11 @@ typedef struct MppRateControl_s { MppPIDCtx pid_intra; MppPIDCtx pid_inter; MppPIDCtx pid_fps; + /* + * Vbv buffer control + */ + MppVirtualBuffer vb; + RK_S32 hrd; /* * output target bits on current status @@ -186,6 +212,7 @@ typedef struct MppRateControl_s { * non-zero - have rate control */ RK_S32 bits_target; + RK_S32 pre_gop_left_bit; float max_rate; float min_rate; diff --git a/mpp/codec/mpp_rc.cpp b/mpp/codec/mpp_rc.cpp index 0748e6c5..fef845d4 100644 --- a/mpp/codec/mpp_rc.cpp +++ b/mpp/codec/mpp_rc.cpp @@ -29,6 +29,8 @@ #define MPP_RC_DBG_RC (0x00000020) #define MPP_RC_DBG_CFG (0x00000100) #define MPP_RC_DBG_RECORD (0x00001000) +#define MPP_RC_DBG_VBV (0x00002000) + #define mpp_rc_dbg(flag, fmt, ...) _mpp_dbg(mpp_rc_debug, flag, fmt, ## __VA_ARGS__) #define mpp_rc_dbg_f(flag, fmt, ...) _mpp_dbg_f(mpp_rc_debug, flag, fmt, ## __VA_ARGS__) @@ -37,6 +39,8 @@ #define mpp_rc_dbg_bps(fmt, ...) mpp_rc_dbg(MPP_RC_DBG_BPS, fmt, ## __VA_ARGS__) #define mpp_rc_dbg_rc(fmt, ...) mpp_rc_dbg(MPP_RC_DBG_RC, fmt, ## __VA_ARGS__) #define mpp_rc_dbg_cfg(fmt, ...) mpp_rc_dbg(MPP_RC_DBG_CFG, fmt, ## __VA_ARGS__) +#define mpp_rc_dbg_vbv(fmt, ...) mpp_rc_dbg(MPP_RC_DBG_VBV, fmt, ## __VA_ARGS__) + #define SIGN(a) ((a) < (0) ? (-1) : (1)) #define DIV(a, b) (((a) + (SIGN(a) * (b)) / 2) / (b)) @@ -204,6 +208,132 @@ MPP_RET mpp_rc_init(MppRateControl **ctx) return ret; } + +RK_S32 mpp_rc_vbv_check(MppVirtualBuffer *vb, RK_S32 timeInc, RK_S32 hrd) +{ + RK_S32 drift, target, bitPerPic = vb->bitPerPic; + if (hrd) { +#if RC_CBR_HRD + /* In CBR mode, bucket _must not_ underflow. Insert filler when + * needed. */ + vb->bucketFullness -= bitPerPic; +#else + if (vb->bucketFullness >= bitPerPic) { + vb->bucketFullness -= bitPerPic; + } else { + vb->realBitCnt += (bitPerPic - vb->bucketFullness); + vb->bucketFullness = 0; + } +#endif + } + + /* Saturate realBitCnt, this is to prevent overflows caused by much greater + bitrate setting than is really possible to reach */ + if (vb->realBitCnt > 0x1FFFFFFF) + vb->realBitCnt = 0x1FFFFFFF; + if (vb->realBitCnt < -0x1FFFFFFF) + vb->realBitCnt = -0x1FFFFFFF; + + vb->picTimeInc += timeInc; + vb->virtualBitCnt += axb_div_c(vb->bitRate, timeInc, vb->timeScale); + target = vb->virtualBitCnt - vb->realBitCnt; + + /* Saturate target, prevents rc going totally out of control. + This situation should never happen. */ + if (target > 0x1FFFFFFF) + target = 0x1FFFFFFF; + if (target < -0x1FFFFFFF) + target = -0x1FFFFFFF; + + /* picTimeInc must be in range of [0, timeScale) */ + while (vb->picTimeInc >= vb->timeScale) { + vb->picTimeInc -= vb->timeScale; + vb->virtualBitCnt -= vb->bitRate; + vb->realBitCnt -= vb->bitRate; + } + drift = axb_div_c(vb->bitRate, vb->picTimeInc, vb->timeScale); + drift -= vb->virtualBitCnt; + vb->virtualBitCnt += drift; + + mpp_rc_dbg_vbv("virtualBitCnt:\t\t%6i realBitCnt: %i ", + vb->virtualBitCnt, vb->realBitCnt); + mpp_rc_dbg_vbv("target: %i timeInc: %i\n", target, timeInc); + return target; +} + +RK_S32 mpp_rc_vbv_update(MppRateControl *ctx, int bitCnt) +{ + MppVirtualBuffer *vb = &ctx->vb; + RK_S32 stat; + if (ctx->hrd && (bitCnt > (vb->bufferSize - vb->bucketFullness))) { + mpp_rc_dbg_vbv("Be: %7i ", vb->bucketFullness); + mpp_rc_dbg_vbv("fillerBits %5i ", 0); + mpp_rc_dbg_vbv("bitCnt %d spaceLeft %d ", + bitCnt, (vb->bufferSize - vb->bucketFullness)); + mpp_rc_dbg_vbv("bufSize %d bucketFullness %d bitPerPic %d\n", + vb->bufferSize, vb->bucketFullness, vb->bitPerPic); + mpp_rc_dbg_vbv("HRD overflow, frame discard\n"); + return MPP_ERR_BUFFER_FULL; + } else { + vb->bucketFullness += bitCnt; + vb->realBitCnt += bitCnt; + } + if (!ctx->hrd) { + return 0; + } +#if RC_CBR_HRD + /* Bits needed to prevent bucket underflow */ + tmp = vb->bitPerPic - vb->bucketFullness; + + if (tmp > 0) { + tmp = (tmp + 7) / 8; + vb->bucketFullness += tmp * 8; + vb->realBitCnt += tmp * 8; + } else { + tmp = 0; + } +#endif + + /* Update Buffering Info */ + stat = vb->bufferSize - vb->bucketFullness; + + return stat; +} + +static void mpp_rc_vbv_init(MppRateControl *ctx, MppEncRcCfg *cfg) +{ + RK_S32 tmp = 3 * 8 * ctx->mb_per_frame * 256 / 2; + RK_S32 bps = ctx->bps_target; + RK_S32 cpbSize = -1; + RK_S32 i = 0; + /* bits per second */ + tmp = axb_div_c(tmp, cfg->fps_out_num, cfg->fps_out_denorm); + if (bps > (tmp / 2)) + bps = tmp / 2; + + cpbSize = bps; + + /* Limit minimum CPB size based on average bits per frame */ + tmp = axb_div_c(bps, cfg->fps_out_denorm, cfg->fps_out_num); + cpbSize = MPP_MAX(cpbSize, tmp); + + /* cpbSize must be rounded so it is exactly the size written in stream */ + tmp = cpbSize; + while (4095 < (tmp >> (4 + i++))); + cpbSize = (tmp >> (4 + i)) << (4 + i); + ctx->vb.bufferSize = cpbSize; + ctx->vb.bitRate = ctx->bps_target; + ctx->vb.timeScale = cfg->fps_in_num; + ctx->vb.unitsInTic = cfg->fps_in_denorm; + ctx->vb.windowRem = ctx->window_len; + ctx->vb.bitPerPic = ctx->bits_per_pic; + if (ctx->hrd) { + ctx->vb.bucketFullness = axb_div_c(ctx->vb.bufferSize, 60, 100); + ctx->vb.bucketFullness = ctx->vb.bufferSize - ctx->vb.bucketFullness; + ctx->vb.bucketFullness += ctx->vb.bitPerPic; + } +} + MPP_RET mpp_rc_deinit(MppRateControl *ctx) { if (NULL == ctx) { @@ -372,6 +502,7 @@ MPP_RET mpp_rc_update_user_cfg(MppRateControl *ctx, MppEncRcCfg *cfg, RK_S32 for ctx->bits_per_intra = ctx->bits_per_pic * 3; ctx->bits_per_inter -= ctx->bits_per_intra / (ctx->fps_out - 1); } + mpp_rc_vbv_init(ctx, cfg); } if (ctx->acc_total_count == gop) @@ -402,7 +533,7 @@ MPP_RET mpp_rc_bits_allocation(MppRateControl *ctx, RcSyntax *rc_syn) mpp_log_f("invalid ctx %p rc_syn %p\n", ctx, rc_syn); return MPP_ERR_NULL_PTR; } - + mpp_rc_vbv_check(&ctx->vb, 1, 1); /* step 1: calc target frame bits */ switch (ctx->gop_mode) { case MPP_GOP_ALL_INTER : { @@ -419,12 +550,18 @@ MPP_RET mpp_rc_bits_allocation(MppRateControl *ctx, RcSyntax *rc_syn) float intra_percent = 0.0; RK_S32 diff_bit = mpp_pid_calc(&ctx->pid_fps); /* only affected by last gop */ + ctx->pre_gop_left_bit = ctx->pid_fps.i - diff_bit; + if ( abs(ctx->pre_gop_left_bit) / (ctx->gop - 1) > (ctx->bits_per_pic / 5)) { + RK_S32 val = 1; + if (ctx->pre_gop_left_bit < 0) { + val = -1; + } + ctx->pre_gop_left_bit = val * ctx->bits_per_pic * (ctx->gop - 1) / 5; + } mpp_pid_reset(&ctx->pid_fps); - if (ctx->acc_intra_count) { intra_percent = mpp_data_avg(ctx->intra_percent, 1, 1, 1) / 100.0; ctx->last_intra_percent = intra_percent; - ctx->bits_target = (ctx->fps_out * ctx->bits_per_pic + diff_bit) * intra_percent; } else { @@ -443,7 +580,7 @@ MPP_RET mpp_rc_bits_allocation(MppRateControl *ctx, RcSyntax *rc_syn) ctx->bits_per_inter = (ctx->bps_target * (ctx->gop * 1.0 / ctx->fps_out) - bits_prev_intra + diff_bit * - (1 - ctx->last_intra_percent)) / + (1 - ctx->last_intra_percent) + ctx->pre_gop_left_bit) / (ctx->gop - 1); mpp_rc_dbg_rc("RC: rc ctx %p bits pic %d win %d intra %d inter %d\n", @@ -456,7 +593,10 @@ MPP_RET mpp_rc_bits_allocation(MppRateControl *ctx, RcSyntax *rc_syn) } else { RK_S32 diff_bit = mpp_pid_calc(&ctx->pid_inter); ctx->bits_target = ctx->bits_per_inter - diff_bit; - + if (ctx->bits_target > ctx->bits_per_pic * 2) { + ctx->bits_target = 2 * ctx->bits_per_pic; + ctx->pid_inter.i = ctx->pid_inter.i / 2; + } mpp_rc_dbg_rc("RC: rc ctx %p inter pid diff %d target %d\n", ctx, diff_bit, ctx->bits_target); @@ -551,7 +691,6 @@ MPP_RET mpp_rc_update_hw_result(MppRateControl *ctx, RcHalResult *result) } RK_S32 bits = result->bits; - const char *type_str; RK_S32 bits_target; if (result->type == INTRA_FRAME) { @@ -560,7 +699,6 @@ MPP_RET mpp_rc_update_hw_result(MppRateControl *ctx, RcHalResult *result) mpp_data_update(ctx->intra, bits); mpp_data_update(ctx->gop_bits, bits); mpp_pid_update(&ctx->pid_intra, bits - ctx->bits_target); - type_str = "intra"; bits_target = ctx->bits_per_intra; } else { @@ -569,7 +707,6 @@ MPP_RET mpp_rc_update_hw_result(MppRateControl *ctx, RcHalResult *result) mpp_data_update(ctx->inter, bits); mpp_data_update(ctx->gop_bits, bits); mpp_pid_update(&ctx->pid_inter, bits - ctx->bits_target); - type_str = "inter"; bits_target = ctx->bits_per_inter; } @@ -604,7 +741,7 @@ MPP_RET mpp_rc_update_hw_result(MppRateControl *ctx, RcHalResult *result) ctx->last_fps_bits = 0; ctx->time_in_second++; } - + mpp_rc_vbv_update(ctx, bits); ctx->pre_frmtype = ctx->cur_frmtype; return MPP_OK; diff --git a/mpp/common/h264e_syntax.h b/mpp/common/h264e_syntax.h index c475562f..26f9c931 100644 --- a/mpp/common/h264e_syntax.h +++ b/mpp/common/h264e_syntax.h @@ -20,6 +20,18 @@ typedef enum H264eHwType_t { H264E_VPU } H264eHwType; +#define CTRL_LEVELS 7 /* DO NOT CHANGE THIS */ +#define CHECK_POINTS_MAX 10 /* DO NOT CHANGE THIS */ +#define RC_TABLE_LENGTH 10 /* DO NOT CHANGE THIS */ + +typedef struct VepuQpCtrl_s { + RK_S32 wordCntPrev[CHECK_POINTS_MAX]; /* Real bit count */ + RK_S32 checkPointDistance; + RK_S32 checkPoints; + RK_S32 nonZeroCnt; + RK_S32 frameBitCnt; +} VepuQpCtrl; + /* * Overall configuration required by hardware * Currently support vepu and rkvenc @@ -50,17 +62,19 @@ typedef struct H264eHwCfg_t { RK_U32 input_cr_addr; RK_U32 output_strm_limit_size; RK_U32 output_strm_addr; - /* * For vpu * 0 - inter * 1 - intra - * 2 - mvc-inter - * - * For rkvenc - * RKVENC_FRAME_TYPE_* - */ + * 2 - mvc-inter*/ + + VepuQpCtrl qpCtrl; + /* + * For rkvenc + * RKVENC_FRAME_TYPE_* + */ RK_S32 frame_type; + RK_S32 pre_frame_type; RK_S32 cabac_init_idc; RK_S32 frame_num; @@ -78,6 +92,7 @@ typedef struct H264eHwCfg_t { RK_S32 qp_max; RK_S32 mad_qp_delta; RK_S32 mad_threshold; + RK_S32 pre_bit_diff; /* * VEPU MB rate control parameter @@ -87,7 +102,7 @@ typedef struct H264eHwCfg_t { */ RK_S32 slice_size_mb_rows; RK_S32 cp_distance_mbs; - RK_S32 cp_target[10]; + RK_S32 cp_target[CHECK_POINTS_MAX]; RK_S32 target_error[9]; //for rkv there are 9 levels RK_S32 delta_qp[9]; diff --git a/mpp/hal/common/h264/hal_h264e_com.c b/mpp/hal/common/h264/hal_h264e_com.c index 39dcb5c9..5c577422 100644 --- a/mpp/hal/common/h264/hal_h264e_com.c +++ b/mpp/hal/common/h264/hal_h264e_com.c @@ -621,7 +621,7 @@ MPP_RET h264e_set_sps(H264eHalContext *ctx, H264eSps *sps) sps->vui.b_timing_info_present = i_timebase_num > 0 && i_timebase_den > 0; - if ( sps->vui.b_timing_info_present ) { + if (sps->vui.b_timing_info_present ) { sps->vui.i_num_units_in_tick = i_timebase_num; sps->vui.i_time_scale = i_timebase_den * 2; sps->vui.b_fixed_frame_rate = !b_vfr_input; diff --git a/mpp/hal/common/h264/hal_h264e_com.h b/mpp/hal/common/h264/hal_h264e_com.h index fbbf0962..cfa5fdd1 100644 --- a/mpp/hal/common/h264/hal_h264e_com.h +++ b/mpp/hal/common/h264/hal_h264e_com.h @@ -44,15 +44,6 @@ extern RK_U32 h264e_hal_log_mode; #define H264E_SEI_CHG_SPSPPS 0x00000001 #define H264E_SEI_CHG_RC 0x00000010 -#define H264E_HAL_MASK_2b (RK_U32)0x00000003 -#define H264E_HAL_MASK_3b (RK_U32)0x00000007 -#define H264E_HAL_MASK_4b (RK_U32)0x0000000F -#define H264E_HAL_MASK_5b (RK_U32)0x0000001F -#define H264E_HAL_MASK_6b (RK_U32)0x0000003F -#define H264E_HAL_MASK_11b (RK_U32)0x000007FF -#define H264E_HAL_MASK_14b (RK_U32)0x00003FFF -#define H264E_HAL_MASK_16b (RK_U32)0x0000FFFF - #define h264e_hal_err(fmt, ...) \ do {\ mpp_err_f(fmt, ## __VA_ARGS__);\ @@ -77,15 +68,15 @@ extern RK_U32 h264e_hal_log_mode; mpp_log("line(%d), func(%s), leave", __LINE__, __FUNCTION__);\ } while (0) -#define H264E_HAL_MIN(a,b) ( (a)<(b) ? (a) : (b) ) -#define H264E_HAL_MAX(a,b) ( (a)>(b) ? (a) : (b) ) -#define H264E_HAL_MIN3(a,b,c) H264E_HAL_MIN((a),H264E_HAL_MIN((b),(c))) -#define H264E_HAL_MAX3(a,b,c) H264E_HAL_MAX((a),H264E_HAL_MAX((b),(c))) -#define H264E_HAL_MIN4(a,b,c,d) H264E_HAL_MIN((a),H264E_HAL_MIN3((b),(c),(d))) -#define H264E_HAL_MAX4(a,b,c,d) H264E_HAL_MAX((a),H264E_HAL_MAX3((b),(c),(d))) +#define H264E_HAL_MIN(a,b) ( (a)<(b) ? (a) : (b) ) +#define H264E_HAL_MAX(a,b) ( (a)>(b) ? (a) : (b) ) +#define H264E_HAL_MIN3(a,b,c) H264E_HAL_MIN((a),H264E_HAL_MIN((b),(c))) +#define H264E_HAL_MAX3(a,b,c) H264E_HAL_MAX((a),H264E_HAL_MAX((b),(c))) +#define H264E_HAL_MIN4(a,b,c,d) H264E_HAL_MIN((a),H264E_HAL_MIN3((b),(c),(d))) +#define H264E_HAL_MAX4(a,b,c,d) H264E_HAL_MAX((a),H264E_HAL_MAX3((b),(c),(d))) #define H264E_HAL_CLIP3(v, min, max) ((v) < (min) ? (min) : ((v) > (max) ? (max) : (v))) -#define H264E_HAL_FCLOSE(fp) do{ if(fp) fclose(fp); fp = NULL; } while (0) +#define H264E_HAL_FCLOSE(fp) do{ if(fp) fclose(fp); fp = NULL; } while (0) #define H264E_HAL_SET_REG(reg, addr, val) \ do { \ @@ -95,20 +86,20 @@ extern RK_U32 h264e_hal_log_mode; } while (0) -#define H264E_HAL_VALIDATE_GT(val, name, limit) \ - do { \ - if ((val)<=(limit)) { \ - mpp_err("%s(%d) should > %d", name, val, limit); \ - return MPP_NOK; \ - } \ +#define H264E_HAL_VALIDATE_GT(val, name, limit) \ + do { \ + if ((val)<=(limit)) { \ + mpp_err("%s(%d) should > %d", name, val, limit);\ + return MPP_NOK; \ + } \ } while (0) -#define H264E_HAL_VALIDATE_NEQ(val, name, limit) \ - do { \ - if((val)==(limit)) { \ +#define H264E_HAL_VALIDATE_NEQ(val, name, limit) \ + do { \ + if((val)==(limit)) { \ mpp_err("%s(%d) should not = %d", name, val, limit); \ - return MPP_NOK; \ - } \ + return MPP_NOK; \ + } \ } while (0) #define H264E_HAL_SPRINT(s, len, ...) \ @@ -120,62 +111,62 @@ extern RK_U32 h264e_hal_log_mode; } \ } while (0) -#define H264E_UUID_LENGTH 16 +#define H264E_UUID_LENGTH 16 -#define H264E_REF_MAX 16 +#define H264E_REF_MAX 16 -#define H264E_SPSPPS_BUF_SIZE 512 //sps + pps -#define H264E_SEI_BUF_SIZE 1024 //unit in byte, may not be large enough in the future -#define H264E_EXTRA_INFO_BUF_SIZE (H264E_SPSPPS_BUF_SIZE + H264E_SEI_BUF_SIZE) +#define H264E_SPSPPS_BUF_SIZE 512 //sps + pps +#define H264E_SEI_BUF_SIZE 1024 //unit in byte, may not be large enough in the future +#define H264E_EXTRA_INFO_BUF_SIZE (H264E_SPSPPS_BUF_SIZE + H264E_SEI_BUF_SIZE) -#define H264E_NUM_REFS 1 -#define H264E_LONGTERM_REF_EN 0 -#define H264E_CQM_FLAT 0 -#define H264E_CQM_JVT 1 -#define H264E_CQM_CUSTOM 2 -#define H264E_B_PYRAMID_NONE 0 -#define H264E_B_PYRAMID_STRICT 1 -#define H264E_B_PYRAMID_NORMAL 2 +#define H264E_NUM_REFS 1 +#define H264E_LONGTERM_REF_EN 0 +#define H264E_CQM_FLAT 0 +#define H264E_CQM_JVT 1 +#define H264E_CQM_CUSTOM 2 +#define H264E_B_PYRAMID_NONE 0 +#define H264E_B_PYRAMID_STRICT 1 +#define H264E_B_PYRAMID_NORMAL 2 -#define H264E_CSP2_MASK 0x00ff /* */ -#define H264E_CSP2_NONE 0x0000 /* Invalid mode */ -#define H264E_CSP2_I420 0x0001 /* yuv 4:2:0 planar */ -#define H264E_CSP2_YV12 0x0002 /* yvu 4:2:0 planar */ -#define H264E_CSP2_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */ -#define H264E_CSP2_I422 0x0004 /* yuv 4:2:2 planar */ -#define H264E_CSP2_YV16 0x0005 /* yvu 4:2:2 planar */ -#define H264E_CSP2_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */ -#define H264E_CSP2_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */ -#define H264E_CSP2_I444 0x0008 /* yuv 4:4:4 planar */ -#define H264E_CSP2_YV24 0x0009 /* yvu 4:4:4 planar */ -#define H264E_CSP2_BGR 0x000a /* packed bgr 24bits */ -#define H264E_CSP2_BGRA 0x000b /* packed bgr 32bits */ -#define H264E_CSP2_RGB 0x000c /* packed rgb 24bits */ -#define H264E_CSP2_MAX 0x000d /* end of list */ -#define H264E_CSP2_VFLIP 0x1000 /* the csp is vertically flipped */ -#define H264E_CSP2_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */ +#define H264E_CSP2_MASK 0x00ff /* */ +#define H264E_CSP2_NONE 0x0000 /* Invalid mode */ +#define H264E_CSP2_I420 0x0001 /* yuv 4:2:0 planar */ +#define H264E_CSP2_YV12 0x0002 /* yvu 4:2:0 planar */ +#define H264E_CSP2_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */ +#define H264E_CSP2_I422 0x0004 /* yuv 4:2:2 planar */ +#define H264E_CSP2_YV16 0x0005 /* yvu 4:2:2 planar */ +#define H264E_CSP2_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */ +#define H264E_CSP2_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */ +#define H264E_CSP2_I444 0x0008 /* yuv 4:4:4 planar */ +#define H264E_CSP2_YV24 0x0009 /* yvu 4:4:4 planar */ +#define H264E_CSP2_BGR 0x000a /* packed bgr 24bits */ +#define H264E_CSP2_BGRA 0x000b /* packed bgr 32bits */ +#define H264E_CSP2_RGB 0x000c /* packed rgb 24bits */ +#define H264E_CSP2_MAX 0x000d /* end of list */ +#define H264E_CSP2_VFLIP 0x1000 /* the csp is vertically flipped */ +#define H264E_CSP2_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */ -#define H264E_MB_RC_ONLY_QUALITY 0 -#define H264E_MB_RC_MORE_QUALITY 1 -#define H264E_MB_RC_BALANCE 2 -#define H264E_MB_RC_MORE_BITRATE 3 -#define H264E_MB_RC_ONLY_BITRATE 4 -#define H264E_MB_RC_WIDE_RANGE 5 -#define H264E_MB_RC_ONLY_AQ 6 -#define H264E_MB_RC_M_NUM 7 +#define H264E_MB_RC_ONLY_QUALITY 0 +#define H264E_MB_RC_MORE_QUALITY 1 +#define H264E_MB_RC_BALANCE 2 +#define H264E_MB_RC_MORE_BITRATE 3 +#define H264E_MB_RC_ONLY_BITRATE 4 +#define H264E_MB_RC_WIDE_RANGE 5 +#define H264E_MB_RC_ONLY_AQ 6 +#define H264E_MB_RC_M_NUM 7 typedef enum H264eRkvCsp_e { - H264E_RKV_CSP_BGRA8888, // 0 - H264E_RKV_CSP_BGR888, // 1 - H264E_RKV_CSP_BGR565, // 2 - H264E_RKV_CSP_NONE, // 3 - H264E_RKV_CSP_YUV422SP, // 4 - H264E_RKV_CSP_YUV422P, // 5 - H264E_RKV_CSP_YUV420SP, // 6 - H264E_RKV_CSP_YUV420P, // 7 - H264E_RKV_CSP_YUYV422, // 8 - H264E_RKV_CSP_UYVY422, // 9 - H264E_RKV_CSP_BUTT, // 10 + H264E_RKV_CSP_BGRA8888, // 0 + H264E_RKV_CSP_BGR888, // 1 + H264E_RKV_CSP_BGR565, // 2 + H264E_RKV_CSP_NONE, // 3 + H264E_RKV_CSP_YUV422SP, // 4 + H264E_RKV_CSP_YUV422P, // 5 + H264E_RKV_CSP_YUV420SP, // 6 + H264E_RKV_CSP_YUV420P, // 7 + H264E_RKV_CSP_YUYV422, // 8 + H264E_RKV_CSP_UYVY422, // 9 + H264E_RKV_CSP_BUTT, // 10 } H264eRkvCsp; /* transplant from vpu_api.h:EncInputPictureType */ @@ -444,6 +435,7 @@ typedef struct H264eHalContext_t { H264eHwCfg hw_cfg; MppLinReg *inter_qs; MppLinReg *intra_qs; + MppLinReg *mad; MppData *qp_p; MppData *sse_p; H264eMbRcCtx mb_rc; diff --git a/mpp/hal/vpu/h264e/CMakeLists.txt b/mpp/hal/vpu/h264e/CMakeLists.txt index 90897005..8eaf64a6 100644 --- a/mpp/hal/vpu/h264e/CMakeLists.txt +++ b/mpp/hal/vpu/h264e/CMakeLists.txt @@ -9,9 +9,10 @@ set(HAL_H264E_HDR # hal h264 encoder sourse set(HAL_H264E_SRC - hal_h264e_header.c hal_h264e_vepu2.c hal_h264e_vepu1.c + hal_h264e_header.c + hal_h264e_rc.c ) add_library(hal_h264e_vpu STATIC diff --git a/mpp/hal/vpu/h264e/hal_h264e_header.c b/mpp/hal/vpu/h264e/hal_h264e_header.c index 7ad51aaa..189c5a89 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_header.c +++ b/mpp/hal/vpu/h264e/hal_h264e_header.c @@ -201,8 +201,8 @@ hal_h264e_vpu_stream_put_bits_with_detect(H264eVpuStream * buffer, mpp_assert(value < (1 << number)); mpp_assert(number < 25); } - bits = number + buffer->buffered_bits; + byte_buffer = byte_buffer | ((RK_U32) value << (32 - bits)); while (bits > 7) { @@ -402,27 +402,41 @@ static MPP_RET hal_h264e_vpu_write_sps(H264eVpuStream *stream, hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.b_signal_type_present, 1, "video_signal_type_present_flag"); + if (sps->vui.b_signal_type_present) { + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.i_vidformat, 3, "video_format"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.b_fullrange, 1, "video_full_range_flag"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.b_color_description_present, 1, + "colour_description_present_flag"); + if (sps->vui.b_color_description_present) { + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.i_colorprim, 8, "colour_primaries"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.i_transfer, 8, "transfer_characteristics"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.i_colmatrix, 8, "matrix_coefficients"); + } + } hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.b_chroma_loc_info_present, 1, "chroma_loc_info_present_flag"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.b_timing_info_present, 1, "timing_info_present_flag"); - hal_h264e_vpu_stream_put_bits_with_detect(stream, - sps->vui.i_num_units_in_tick >> 16, 16, - "num_units_in_tick msb"); - hal_h264e_vpu_stream_put_bits_with_detect(stream, - sps->vui.i_num_units_in_tick & 0xffff, 16, - "num_units_in_tick lsb"); - hal_h264e_vpu_stream_put_bits_with_detect(stream, - sps->vui.i_time_scale >> 16, 16, - "time_scale msb"); - hal_h264e_vpu_stream_put_bits_with_detect(stream, - sps->vui.i_time_scale & 0xffff, 16, - "time_scale lsb"); - hal_h264e_vpu_stream_put_bits_with_detect(stream, - sps->vui.b_fixed_frame_rate, 1, - "fixed_frame_rate_flag"); + if (sps->vui.b_timing_info_present) { + hal_h264e_vpu_stream_put_bits_with_detect(stream, + sps->vui.i_num_units_in_tick >> 16, 16, + "num_units_in_tick msb"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, + sps->vui.i_num_units_in_tick & 0xffff, 16, + "num_units_in_tick lsb"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, + sps->vui.i_time_scale >> 16, 16, + "time_scale msb"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, + sps->vui.i_time_scale & 0xffff, 16, + "time_scale lsb"); + hal_h264e_vpu_stream_put_bits_with_detect(stream, + sps->vui.b_fixed_frame_rate, 1, + "fixed_frame_rate_flag"); + } hal_h264e_vpu_stream_put_bits_with_detect(stream, sps->vui.b_nal_hrd_parameters_present, 1, "nal_hrd_parameters_present_flag"); @@ -584,39 +598,6 @@ static MPP_RET h264e_vpu_sei_encode(H264eHalContext *ctx) return MPP_OK; } -static RK_S32 find_best_qp(MppLinReg *ctx, MppEncH264Cfg *codec, - RK_S32 qp_start, RK_S32 bits) -{ - RK_S32 qp = qp_start; - RK_S32 qp_best = qp_start; - RK_S32 qp_min = codec->qp_min; - RK_S32 qp_max = codec->qp_max; - RK_S32 diff_best = INT_MAX; - - if (ctx->a == 0 && ctx->b == 0) - return qp_best; - - if (bits <= 0) { - qp_best = mpp_clip(qp_best + codec->qp_max_step, qp_min, qp_max); - } else { - do { - RK_S32 est_bits = mpp_quadreg_calc(ctx, h264_q_step[qp]); - RK_S32 diff = est_bits - bits; - if (MPP_ABS(diff) < MPP_ABS(diff_best)) { - diff_best = MPP_ABS(diff); - qp_best = qp; - if (diff > 0) - qp++; - else - qp--; - } else - break; - } while (qp <= qp_max && qp >= qp_min); - } - - return qp_best; -} - MPP_RET h264e_vpu_init_extra_info(void *extra_info) { static const RK_U8 h264e_sei_uuid[H264E_UUID_LENGTH] = { @@ -784,232 +765,6 @@ MPP_RET h264e_vpu_allocate_buffers(H264eHalContext *ctx) return MPP_OK; } -MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task, - H264eHwCfg *hw_cfg) -{ - RK_S32 i; - MppEncCfgSet *cfg = ctx->cfg; - MppEncH264Cfg *codec = &cfg->codec.h264; - MppEncPrepCfg *prep = &cfg->prep; - MppEncRcCfg *rc = &cfg->rc; - RcSyntax *rc_syn = (RcSyntax *)task->syntax.data; - - /* preprocess setup */ - if (prep->change) { - RK_U32 change = prep->change; - - if (change & MPP_ENC_PREP_CFG_CHANGE_INPUT) { - hw_cfg->width = prep->width; - hw_cfg->height = prep->height; - - hw_cfg->hor_stride = prep->hor_stride; - hw_cfg->ver_stride = prep->ver_stride; - } - - if (change & MPP_ENC_PREP_CFG_CHANGE_FORMAT) { - hw_cfg->input_format = prep->format; - h264e_vpu_set_format(hw_cfg, prep); - switch (prep->color) { - case MPP_FRAME_SPC_RGB : { - /* BT.601 */ - /* Y = 0.2989 R + 0.5866 G + 0.1145 B - * Cb = 0.5647 (B - Y) + 128 - * Cr = 0.7132 (R - Y) + 128 - */ - hw_cfg->color_conversion_coeff_a = 19589; - hw_cfg->color_conversion_coeff_b = 38443; - hw_cfg->color_conversion_coeff_c = 7504; - hw_cfg->color_conversion_coeff_e = 37008; - hw_cfg->color_conversion_coeff_f = 46740; - } break; - case MPP_FRAME_SPC_BT709 : { - /* BT.709 */ - /* Y = 0.2126 R + 0.7152 G + 0.0722 B - * Cb = 0.5389 (B - Y) + 128 - * Cr = 0.6350 (R - Y) + 128 - */ - hw_cfg->color_conversion_coeff_a = 13933; - hw_cfg->color_conversion_coeff_b = 46871; - hw_cfg->color_conversion_coeff_c = 4732; - hw_cfg->color_conversion_coeff_e = 35317; - hw_cfg->color_conversion_coeff_f = 41615; - } break; - default : { - hw_cfg->color_conversion_coeff_a = 19589; - hw_cfg->color_conversion_coeff_b = 38443; - hw_cfg->color_conversion_coeff_c = 7504; - hw_cfg->color_conversion_coeff_e = 37008; - hw_cfg->color_conversion_coeff_f = 46740; - } break; - } - } - - prep->change = 0; - } - - if (codec->change) { - // TODO: setup sps / pps here - hw_cfg->idr_pic_id = !ctx->idr_pic_id; - hw_cfg->filter_disable = codec->deblock_disable; - hw_cfg->slice_alpha_offset = codec->deblock_offset_alpha; - hw_cfg->slice_beta_offset = codec->deblock_offset_beta; - hw_cfg->inter4x4_disabled = (codec->profile >= 31) ? (1) : (0); - hw_cfg->cabac_init_idc = codec->cabac_init_idc; - hw_cfg->qp = codec->qp_init; - - hw_cfg->qp_prev = hw_cfg->qp; - - codec->change = 0; - } - - if (hw_cfg->qp <= 0) { - RK_S32 qp_tbl[2][13] = { - { - 26, 36, 48, 63, 85, 110, 152, 208, 313, 427, 936, - 1472, 0x7fffffff - }, - {42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6} - }; - RK_S32 pels = ctx->cfg->prep.width * ctx->cfg->prep.height; - RK_S32 bits_per_pic = axb_div_c(rc->bps_target, - rc->fps_out_denorm, - rc->fps_out_num); - - if (pels) { - RK_S32 upscale = 8000; - if (bits_per_pic > 1000000) - hw_cfg->qp = codec->qp_min; - else { - RK_S32 j = -1; - - pels >>= 8; - bits_per_pic >>= 5; - - bits_per_pic *= pels + 250; - bits_per_pic /= 350 + (3 * pels) / 4; - bits_per_pic = axb_div_c(bits_per_pic, upscale, pels << 6); - - while (qp_tbl[0][++j] < bits_per_pic); - - hw_cfg->qp = qp_tbl[1][j]; - hw_cfg->qp_prev = hw_cfg->qp; - } - } - } - - if (NULL == ctx->intra_qs) - mpp_linreg_init(&ctx->intra_qs, MPP_MIN(rc->gop, 10), 2); - if (NULL == ctx->inter_qs) - mpp_linreg_init(&ctx->inter_qs, MPP_MIN(rc->gop, 10), 2); - - mpp_assert(ctx->intra_qs); - mpp_assert(ctx->inter_qs); - - /* frame type and rate control setup */ - { - RK_S32 prev_coding_type = hw_cfg->frame_type; - - if (rc_syn->type == INTRA_FRAME) { - hw_cfg->frame_type = H264E_VPU_FRAME_I; - hw_cfg->frame_num = 0; - - hw_cfg->qp = find_best_qp(ctx->intra_qs, codec, hw_cfg->qp_prev, - rc_syn->bit_target); - - /* - * Previous frame is inter then intra frame can not - * have a big qp step between these two frames - */ - if (prev_coding_type == 0) - hw_cfg->qp = mpp_clip(hw_cfg->qp, hw_cfg->qp_prev - 4, - hw_cfg->qp_prev + 4); - } else { - hw_cfg->frame_type = H264E_VPU_FRAME_P; - - hw_cfg->qp = find_best_qp(ctx->inter_qs, codec, hw_cfg->qp_prev, - rc_syn->bit_target); - - if (prev_coding_type == 1) - hw_cfg->qp = mpp_clip(hw_cfg->qp, hw_cfg->qp_prev - 4, - hw_cfg->qp_prev + 4); - } - } - - hw_cfg->qp = mpp_clip(hw_cfg->qp, - hw_cfg->qp_prev - codec->qp_max_step, - hw_cfg->qp_prev + codec->qp_max_step); - - hw_cfg->qp_prev = hw_cfg->qp; - - hw_cfg->mad_qp_delta = 0; - hw_cfg->mad_threshold = 6; - hw_cfg->keyframe_max_interval = rc->gop; - hw_cfg->qp_min = codec->qp_min; - hw_cfg->qp_max = codec->qp_max; - - /* disable mb rate control first */ - hw_cfg->cp_distance_mbs = 0; - for (i = 0; i < 10; i++) - hw_cfg->cp_target[i] = 0; - - for (i = 0; i < 7; i++) - hw_cfg->target_error[i] = 0; - - for (i = 0; i < 7; i++) - hw_cfg->delta_qp[i] = 0; - - /* slice mode setup */ - hw_cfg->slice_size_mb_rows = 0; //(prep->height + 15) >> 4; - - /* input and preprocess config, the offset is at [31:10] */ - hw_cfg->input_luma_addr = mpp_buffer_get_fd(task->input); - - switch (prep->format) { - case MPP_FMT_YUV420SP: { - RK_U32 offset_uv = hw_cfg->hor_stride * hw_cfg->ver_stride; - - mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 8)); - mpp_assert(prep->ver_stride == MPP_ALIGN(prep->height, 8)); - - hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_uv << 10); - hw_cfg->input_cr_addr = 0; - break; - } - case MPP_FMT_YUV420P: { - RK_U32 offset_y = hw_cfg->hor_stride * hw_cfg->ver_stride; - - mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 8)); - mpp_assert(prep->ver_stride == MPP_ALIGN(prep->height, 8)); - - hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_y << 10); - hw_cfg->input_cr_addr = hw_cfg->input_cb_addr + (offset_y << 8); - break; - } - case MPP_FMT_YUV422_YUYV: - case MPP_FMT_YUV422_UYVY: - case MPP_FMT_RGB565: - case MPP_FMT_BGR444: - case MPP_FMT_BGR888: - case MPP_FMT_RGB888: - case MPP_FMT_ARGB8888: - case MPP_FMT_ABGR8888: - case MPP_FMT_BGR101010: - hw_cfg->input_cb_addr = 0; - hw_cfg->input_cr_addr = 0; - break; - default: { - mpp_err_f("invalid input format %d", prep->format); - return MPP_ERR_VALUE; - } - } - hw_cfg->output_strm_addr = mpp_buffer_get_fd(task->output); - hw_cfg->output_strm_limit_size = mpp_buffer_get_size(task->output); - - /* context update */ - ctx->idr_pic_id = !ctx->idr_pic_id; - return MPP_OK; -} - MPP_RET h264e_vpu_update_buffers(H264eHalContext *ctx, H264eHwCfg *hw_cfg) { MPP_RET ret = MPP_OK; @@ -1063,4 +818,3 @@ MPP_RET h264e_vpu_update_buffers(H264eHalContext *ctx, H264eHwCfg *hw_cfg) h264e_hal_leave(); return MPP_OK; } - diff --git a/mpp/hal/vpu/h264e/hal_h264e_header.h b/mpp/hal/vpu/h264e/hal_h264e_header.h index ba63f9d8..10a7ad47 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_header.h +++ b/mpp/hal/vpu/h264e/hal_h264e_header.h @@ -29,9 +29,6 @@ MPP_RET h264e_vpu_set_extra_info(H264eHalContext *ctx); MPP_RET h264e_vpu_free_buffers(H264eHalContext *ctx); MPP_RET h264e_vpu_allocate_buffers(H264eHalContext *ctx); -MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task, - H264eHwCfg *hw_cfg); -MPP_RET h264e_vpu_update_buffers(H264eHalContext *ctx, H264eHwCfg *hw_cfg); - RK_S32 exp_golomb_signed(RK_S32 val); + #endif diff --git a/mpp/hal/vpu/h264e/hal_h264e_rc.c b/mpp/hal/vpu/h264e/hal_h264e_rc.c new file mode 100644 index 00000000..1f544065 --- /dev/null +++ b/mpp/hal/vpu/h264e/hal_h264e_rc.c @@ -0,0 +1,444 @@ +/* + * Copyright 2015 - 2017 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "rk_mpi.h" +#include "mpp_mem.h" +#include "mpp_common.h" + +#include "hal_h264e_com.h" +#include "hal_h264e_vepu.h" +#include "hal_h264e_rc.h" +#include "hal_h264e_vpu_tbl.h" + +const RK_S32 h264_q_step[] = { + 3, 3, 3, 4, 4, 5, 5, 6, 7, 7, + 8, 9, 10, 11, 13, 14, 16, 18, 20, 23, + 25, 28, 32, 36, 40, 45, 51, 57, 64, 72, + 80, 90, 101, 114, 128, 144, 160, 180, 203, 228, + 256, 288, 320, 360, 405, 456, 513, 577, 640, 720, + 810, 896 +}; + +static RK_S32 find_best_qp(MppLinReg *ctx, MppEncH264Cfg *codec, + RK_S32 qp_start, RK_S32 bits) +{ + RK_S32 qp = qp_start; + RK_S32 qp_best = qp_start; + RK_S32 qp_min = codec->qp_min; + RK_S32 qp_max = codec->qp_max; + RK_S32 diff_best = INT_MAX; + + if (ctx->a == 0 && ctx->b == 0) + return qp_best; + + if (bits <= 0) { + qp_best = mpp_clip(qp_best + codec->qp_max_step, qp_min, qp_max); + } else { + do { + RK_S32 est_bits = mpp_quadreg_calc(ctx, h264_q_step[qp]); + RK_S32 diff = est_bits - bits; + if (MPP_ABS(diff) < MPP_ABS(diff_best)) { + diff_best = MPP_ABS(diff); + qp_best = qp; + if (diff > 0) + qp++; + else + qp--; + } else + break; + } while (qp <= qp_max && qp >= qp_min); + } + + return qp_best; +} + +#define WORD_CNT_MAX 65535 + +MPP_RET h264e_vpu_mb_rc_cfg(H264eHalContext *ctx, RcSyntax *rc_syn, H264eHwCfg *hw_cfg) +{ + const RK_S32 sscale = 256; + VepuQpCtrl *qc = &hw_cfg->qpCtrl; + RK_S32 scaler, srcPrm; + RK_S32 i; + RK_S32 tmp, nonZeroTarget; + MppEncCfgSet *cfg = ctx->cfg; + MppEncH264Cfg *codec = &cfg->codec.h264; + int intraQpDelta = 3; + MppEncRcCfg *rc = &cfg->rc; + RK_S32 mbPerPic = (hw_cfg->width + 15) / 16 * (hw_cfg->height + 15) / 16; + RK_S32 coeffCntMax = mbPerPic * 24 * 16; + RK_S32 bits_per_pic = axb_div_c(rc->bps_target, + rc->fps_out_denorm, + rc->fps_out_num); + + if (hw_cfg->qp <= 0) { + RK_S32 qp_tbl[2][13] = { + { + 26, 36, 48, 63, 85, 110, 152, 208, 313, 427, 936, + 1472, 0x7fffffff + }, + {42, 39, 36, 33, 30, 27, 24, 21, 18, 15, 12, 9, 6} + }; + RK_S32 pels = ctx->cfg->prep.width * ctx->cfg->prep.height; + if (pels) { + RK_S32 upscale = 8000; + if (bits_per_pic > 1000000) + hw_cfg->qp = codec->qp_min; + else { + RK_S32 j = -1; + + pels >>= 8; + bits_per_pic >>= 5; + + bits_per_pic *= pels + 250; + bits_per_pic /= 350 + (3 * pels) / 4; + bits_per_pic = axb_div_c(bits_per_pic, upscale, pels << 6); + + while (qp_tbl[0][++j] < bits_per_pic); + + hw_cfg->qp = qp_tbl[1][j]; + hw_cfg->qp_prev = hw_cfg->qp; + } + } + //first frame init + } + if (ctx->frame_cnt == 0) { + RK_S32 mbRows = ctx->cfg->prep.height / 16; + hw_cfg->mad_qp_delta = 2; + hw_cfg->mad_threshold = 256 * 6; + hw_cfg->qpCtrl.checkPoints = MPP_MIN(mbRows - 1, CHECK_POINTS_MAX); + if (rc->rc_mode == MPP_ENC_RC_MODE_CBR) { + hw_cfg->qpCtrl.checkPointDistance = + mbPerPic / (hw_cfg->qpCtrl.checkPoints + 1); + } else { + hw_cfg->qpCtrl.checkPointDistance = 0; + } + } + /* frame type and rate control setup */ + { + hw_cfg->pre_frame_type = hw_cfg->frame_type; + if (rc_syn->type == INTRA_FRAME) { + hw_cfg->frame_type = H264E_VPU_FRAME_I; + hw_cfg->frame_num = 0; + if (ctx->frame_cnt > 0) { + hw_cfg->qp = mpp_data_avg(ctx->qp_p, -1, 1, 1); + hw_cfg->qp += intraQpDelta; + } + /* + * Previous frame is inter then intra frame can not + * have a big qp step between these two frames + */ + if (hw_cfg->pre_frame_type == H264E_VPU_FRAME_P) + hw_cfg->qp = mpp_clip(hw_cfg->qp, hw_cfg->qp_prev - 4, + hw_cfg->qp_prev + 4); + else + hw_cfg->qp = find_best_qp(ctx->intra_qs, codec, hw_cfg->qp_prev, + rc_syn->bit_target); + } else { + hw_cfg->frame_type = H264E_VPU_FRAME_P; + + hw_cfg->qp = find_best_qp(ctx->inter_qs, codec, hw_cfg->qp_prev, + rc_syn->bit_target); + + if (hw_cfg->pre_frame_type == H264E_VPU_FRAME_I) + hw_cfg->qp = mpp_clip(hw_cfg->qp, hw_cfg->qp_prev - 4, + hw_cfg->qp_prev + 4); + else { + if (hw_cfg->pre_bit_diff < 0 && hw_cfg->qp >= hw_cfg->qp_prev) { + hw_cfg->pre_bit_diff = abs(hw_cfg->pre_bit_diff); + if (hw_cfg->pre_bit_diff <= rc_syn->bit_target * 1 / 5) { + hw_cfg->qp = hw_cfg->qp_prev; + } else if (hw_cfg->pre_bit_diff > rc_syn->bit_target * 1 / 5) + hw_cfg->qp = hw_cfg->qp_prev - 1; + else if (hw_cfg->pre_bit_diff > rc_syn->bit_target * 2 / 3) + hw_cfg->qp = hw_cfg->qp_prev - 2; + else + hw_cfg->qp = hw_cfg->qp_prev - 3; + } + } + } + } + hw_cfg->qp = mpp_clip(hw_cfg->qp, + hw_cfg->qp_prev - codec->qp_max_step, + hw_cfg->qp_prev + codec->qp_max_step); + + hw_cfg->qp = mpp_clip(hw_cfg->qp, codec->qp_min, codec->qp_max); + + + if (qc->nonZeroCnt == 0) { + qc->nonZeroCnt = 1; + } + + srcPrm = axb_div_c(qc->frameBitCnt, 256, qc->nonZeroCnt); + /* Disable Mb Rc for Intra Slices, because coeffTarget will be wrong */ + if (hw_cfg->frame_type == INTRA_FRAME || srcPrm == 0) { + return 0; + } + + /* Required zero cnt */ + nonZeroTarget = axb_div_c(rc_syn->bit_target, 256, srcPrm); + nonZeroTarget = MPP_MIN(coeffCntMax, MPP_MAX(0, nonZeroTarget)); + nonZeroTarget = MPP_MIN(0x7FFFFFFFU / 1024U, (RK_U32)nonZeroTarget); + + if (nonZeroTarget > 0) { + scaler = axb_div_c(nonZeroTarget, sscale, (RK_S32) mbPerPic); + } else { + return 0; + } + + if ((hw_cfg->frame_type != hw_cfg->pre_frame_type) || (qc->nonZeroCnt == 0)) { + + for (i = 0; i < qc->checkPoints; i++) { + tmp = (scaler * (qc->checkPointDistance * (i + 1) + 1)) / sscale; + tmp = MPP_MIN(WORD_CNT_MAX, tmp / 32 + 1); + if (tmp < 0) tmp = WORD_CNT_MAX; /* Detect overflow */ + hw_cfg->cp_target[i] = tmp; /* div32 for regs */ + } + tmp = axb_div_c(bits_per_pic, 256, srcPrm); + } else { + for (i = 0; i < qc->checkPoints; i++) { + tmp = (RK_S32) (qc->wordCntPrev[i] * scaler) / sscale; + tmp = MPP_MIN(WORD_CNT_MAX, tmp / 32 + 1); + if (tmp < 0) tmp = WORD_CNT_MAX; /* Detect overflow */ + hw_cfg->cp_target[i] = tmp; /* div32 for regs */ + } + tmp = axb_div_c(bits_per_pic, 256, srcPrm); + } + + hw_cfg->target_error[0] = -tmp * 3; + hw_cfg->delta_qp[0] = -3; + hw_cfg->target_error[1] = -tmp * 2; + hw_cfg->delta_qp[1] = -2; + hw_cfg->target_error[2] = -tmp * 1; + hw_cfg->delta_qp[2] = -1; + hw_cfg->target_error[3] = tmp * 1; + hw_cfg->delta_qp[3] = 0; + hw_cfg->target_error[4] = tmp * 2; + hw_cfg->delta_qp[4] = 1; + hw_cfg->target_error[5] = tmp * 3; + hw_cfg->delta_qp[5] = 2; + hw_cfg->target_error[6] = tmp * 4; + hw_cfg->delta_qp[6] = 3; + + for (i = 0; i < CTRL_LEVELS; i++) { + tmp = hw_cfg->cp_target[i]; + tmp = mpp_clip(tmp / 4, -32768, 32767); + hw_cfg->cp_target[i] = tmp; + } + hw_cfg->cp_distance_mbs = hw_cfg->qpCtrl.checkPointDistance; + return 0; +} + +MPP_RET h264e_vpu_mad_threshold(H264eHwCfg *hw_cfg, MppLinReg *mad, RK_U32 madCount) +{ + RK_S32 mbPerPic = (hw_cfg->width + 15) / 16 * (hw_cfg->height + 15) / 16; + RK_U32 targetCount = 30 * mbPerPic / 100; + RK_S32 threshold = hw_cfg->mad_threshold; + RK_S32 lowLimit, highLimit; + + mpp_save_regdata(mad, hw_cfg->mad_threshold, madCount); + mpp_linreg_update(mad); +// mpp_log("hw_cfg->mad_threshold = %d",hw_cfg->mad_threshold); + /* Calculate new threshold for next frame using either linear regression + * model or adjustment based on current setting */ + if (mad->a) + threshold = mad->a * targetCount / 32 + mad->b; + else if (madCount < targetCount) + threshold = MPP_MAX(hw_cfg->mad_threshold * 5 / 4, hw_cfg->mad_threshold + 256); + else + threshold = MPP_MIN(hw_cfg->mad_threshold * 3 / 4, hw_cfg->mad_threshold - 256); + + /* For small count, ensure that we increase the threshold minimum 1 step */ + if (madCount < targetCount / 2) + threshold = MPP_MAX(threshold, hw_cfg->mad_threshold + 256); + + /* If previous frame had zero count, ensure that we increase threshold */ + if (!madCount) + threshold = MPP_MAX(threshold, hw_cfg->mad_threshold + 256 * 4); + + /* Limit how much the threshold can change between two frames */ + lowLimit = hw_cfg->mad_threshold / 2; + highLimit = MPP_MAX(hw_cfg->mad_threshold * 2, 256 * 4); + hw_cfg->mad_threshold = MPP_MIN(highLimit, MPP_MAX(lowLimit, threshold)); + + /* threshold_div256 has 6-bits range [0,63] */ + hw_cfg->mad_threshold = ((hw_cfg->mad_threshold + 128) / 256) * 256; + hw_cfg->mad_threshold = MPP_MAX(0, MPP_MIN(63 * 256, hw_cfg->mad_threshold)); + return 0; +} + +MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task, + H264eHwCfg *hw_cfg) +{ + MppEncCfgSet *cfg = ctx->cfg; + MppEncH264Cfg *codec = &cfg->codec.h264; + MppEncPrepCfg *prep = &cfg->prep; + MppEncRcCfg *rc = &cfg->rc; + RcSyntax *rc_syn = (RcSyntax *)task->syntax.data; + + /* preprocess setup */ + if (prep->change) { + RK_U32 change = prep->change; + + if (change & MPP_ENC_PREP_CFG_CHANGE_INPUT) { + hw_cfg->width = prep->width; + hw_cfg->height = prep->height; + + hw_cfg->hor_stride = prep->hor_stride; + hw_cfg->ver_stride = prep->ver_stride; + } + + if (change & MPP_ENC_PREP_CFG_CHANGE_FORMAT) { + hw_cfg->input_format = prep->format; + h264e_vpu_set_format(hw_cfg, prep); + switch (prep->color) { + case MPP_FRAME_SPC_RGB : { + /* BT.601 */ + /* Y = 0.2989 R + 0.5866 G + 0.1145 B + * Cb = 0.5647 (B - Y) + 128 + * Cr = 0.7132 (R - Y) + 128 + */ + hw_cfg->color_conversion_coeff_a = 19589; + hw_cfg->color_conversion_coeff_b = 38443; + hw_cfg->color_conversion_coeff_c = 7504; + hw_cfg->color_conversion_coeff_e = 37008; + hw_cfg->color_conversion_coeff_f = 46740; + } break; + case MPP_FRAME_SPC_BT709 : { + /* BT.709 */ + /* Y = 0.2126 R + 0.7152 G + 0.0722 B + * Cb = 0.5389 (B - Y) + 128 + * Cr = 0.6350 (R - Y) + 128 + */ + hw_cfg->color_conversion_coeff_a = 13933; + hw_cfg->color_conversion_coeff_b = 46871; + hw_cfg->color_conversion_coeff_c = 4732; + hw_cfg->color_conversion_coeff_e = 35317; + hw_cfg->color_conversion_coeff_f = 41615; + } break; + default : { + hw_cfg->color_conversion_coeff_a = 19589; + hw_cfg->color_conversion_coeff_b = 38443; + hw_cfg->color_conversion_coeff_c = 7504; + hw_cfg->color_conversion_coeff_e = 37008; + hw_cfg->color_conversion_coeff_f = 46740; + } break; + } + } + + prep->change = 0; + } + + if (codec->change) { + // TODO: setup sps / pps here + hw_cfg->idr_pic_id = !ctx->idr_pic_id; + hw_cfg->filter_disable = codec->deblock_disable; + hw_cfg->slice_alpha_offset = codec->deblock_offset_alpha; + hw_cfg->slice_beta_offset = codec->deblock_offset_beta; + hw_cfg->inter4x4_disabled = (codec->profile >= 31) ? (1) : (0); + hw_cfg->cabac_init_idc = codec->cabac_init_idc; + hw_cfg->qp = codec->qp_init; + + hw_cfg->qp_prev = hw_cfg->qp; + + codec->change = 0; + } + if (NULL == ctx->intra_qs) + mpp_linreg_init(&ctx->intra_qs, MPP_MIN(rc->gop, 10), 2); + if (NULL == ctx->inter_qs) + mpp_linreg_init(&ctx->inter_qs, MPP_MIN(rc->gop, 10), 2); + + if (NULL == ctx->mad) + mpp_linreg_init(&ctx->mad, 5, 1); + + if (NULL == ctx->qp_p) + mpp_data_init(&ctx->qp_p, MPP_MIN(rc->gop, 10)); + + mpp_assert(ctx->intra_qs); + mpp_assert(ctx->inter_qs); + if (rc_syn->type == INTRA_FRAME) { + hw_cfg->frame_type = H264E_VPU_FRAME_I; + hw_cfg->frame_num = 0; + } else { + hw_cfg->frame_type = H264E_VPU_FRAME_P; + } + + hw_cfg->keyframe_max_interval = rc->gop; + hw_cfg->qp_min = codec->qp_min; + hw_cfg->qp_max = codec->qp_max; + + if (rc->rc_mode == MPP_ENC_RC_MODE_VBR && + rc->quality == MPP_ENC_RC_QUALITY_CQP) { + hw_cfg->qp = codec->qp_init; + } else { + /* enable mb rate control*/ + h264e_vpu_mb_rc_cfg(ctx, rc_syn, hw_cfg); + } + /* slice mode setup */ + hw_cfg->slice_size_mb_rows = 0; //(prep->height + 15) >> 4; + + /* input and preprocess config, the offset is at [31:10] */ + hw_cfg->input_luma_addr = mpp_buffer_get_fd(task->input); + + switch (prep->format) { + case MPP_FMT_YUV420SP: { + RK_U32 offset_uv = hw_cfg->hor_stride * hw_cfg->ver_stride; + + mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 8)); + mpp_assert(prep->ver_stride == MPP_ALIGN(prep->height, 8)); + + hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_uv << 10); + hw_cfg->input_cr_addr = 0; + break; + } + case MPP_FMT_YUV420P: { + RK_U32 offset_y = hw_cfg->hor_stride * hw_cfg->ver_stride; + + mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 8)); + mpp_assert(prep->ver_stride == MPP_ALIGN(prep->height, 8)); + + hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_y << 10); + hw_cfg->input_cr_addr = hw_cfg->input_cb_addr + (offset_y << 8); + break; + } + case MPP_FMT_YUV422_YUYV: + case MPP_FMT_YUV422_UYVY: + case MPP_FMT_RGB565: + case MPP_FMT_BGR444: + case MPP_FMT_BGR888: + case MPP_FMT_RGB888: + case MPP_FMT_ARGB8888: + case MPP_FMT_ABGR8888: + case MPP_FMT_BGR101010: + hw_cfg->input_cb_addr = 0; + hw_cfg->input_cr_addr = 0; + break; + default: { + mpp_err_f("invalid input format %d", prep->format); + return MPP_ERR_VALUE; + } + } + hw_cfg->output_strm_addr = mpp_buffer_get_fd(task->output); + hw_cfg->output_strm_limit_size = mpp_buffer_get_size(task->output); + + /* context update */ + ctx->idr_pic_id = !ctx->idr_pic_id; + return MPP_OK; +} diff --git a/mpp/hal/vpu/h264e/hal_h264e_rc.h b/mpp/hal/vpu/h264e/hal_h264e_rc.h new file mode 100644 index 00000000..56fbcde8 --- /dev/null +++ b/mpp/hal/vpu/h264e/hal_h264e_rc.h @@ -0,0 +1,33 @@ +/* + * Copyright 2015 - 2017 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _HAL_H264E_RC_H_ +#define _HAL_H264E_RC_H_ + +#include +#include + +#include "rk_mpi.h" +#include "hal_h264e_com.h" + +extern const RK_S32 h264_q_step[]; + +MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task, + H264eHwCfg *hw_cfg); +MPP_RET h264e_vpu_update_buffers(H264eHalContext *ctx, H264eHwCfg *hw_cfg); +MPP_RET h264e_vpu_mad_threshold(H264eHwCfg *hw_cfg, MppLinReg *mad, RK_U32 madCount); + +#endif diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu1.c b/mpp/hal/vpu/h264e/hal_h264e_vepu1.c index 37d66a69..474913eb 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu1.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu1.c @@ -29,7 +29,9 @@ #include "hal_h264e_com.h" #include "hal_h264e_header.h" #include "hal_h264e_vpu_tbl.h" + #include "hal_h264e_vepu.h" +#include "hal_h264e_rc.h" #include "hal_h264e_vepu1.h" #include "hal_h264e_vepu1_reg_tbl.h" @@ -115,6 +117,16 @@ MPP_RET hal_h264e_vepu1_deinit(void *hal) ctx->intra_qs = NULL; } + if (ctx->mad) { + mpp_linreg_deinit(ctx->mad); + ctx->mad = NULL; + } + + if (ctx->qp_p) { + mpp_data_deinit(ctx->qp_p); + ctx->qp_p = NULL; + } + #ifdef RKPLATFORM ret = mpp_device_deinit(ctx->dev_ctx); if (ret) { @@ -443,8 +455,7 @@ MPP_RET hal_h264e_vepu1_start(void *hal, HalTaskInfo *task) return MPP_OK; } -static MPP_RET -hal_h264e_vepu1_set_feedback(h264e_feedback *fb, h264e_vepu1_reg_set *reg) +static MPP_RET hal_h264e_vepu1_set_feedback(h264e_feedback *fb, h264e_vepu1_reg_set *reg) { RK_S32 i = 0; RK_U32 cpt_prev = 0, overflow = 0; @@ -455,7 +466,7 @@ hal_h264e_vepu1_set_feedback(h264e_feedback *fb, h264e_vepu1_reg_set *reg) fb->mad_count = VEPU_REG_MB_CNT_SET(reg_val[VEPU_REG_MB_CTRL / 4]); fb->rlc_count = VEPU_REG_RLC_SUM_OUT(reg_val[VEPU_REG_RLC_CTRL / 4]); fb->out_strm_size = reg_val[VEPU_REG_STR_BUF_LIMIT / 4] / 8; - for (i = 0; i < 10; i++) { + for (i = 0; i < CHECK_POINTS_MAX; i++) { RK_U32 cpt = VEPU_REG_CHECKPOINT_RESULT(reg_val[cpt_idx]); if (cpt < cpt_prev) overflow += (1 << 21); @@ -465,14 +476,50 @@ hal_h264e_vepu1_set_feedback(h264e_feedback *fb, h264e_vepu1_reg_set *reg) return MPP_OK; } +static MPP_RET hal_h264e_vpu1_resend(H264eHalContext *ctx, RK_U32 *reg_out, RK_S32 dealt_qp) +{ + + RK_U32 *p_regs = (RK_U32 *)ctx->regs; + H264eHwCfg *hw_cfg = &ctx->hw_cfg; + RK_U32 val = 0; + RK_S32 hw_ret = 0; + hw_cfg->qp += dealt_qp; + hw_cfg->qp = mpp_clip(hw_cfg->qp, hw_cfg->qp_min, hw_cfg->qp_max); + val = VEPU_REG_H264_LUMA_INIT_QP(hw_cfg->qp) + | VEPU_REG_H264_QP_MAX(hw_cfg->qp_max) + | VEPU_REG_H264_QP_MIN(hw_cfg->qp_min) + | VEPU_REG_H264_CHKPT_DISTANCE(hw_cfg->cp_distance_mbs); + + H264E_HAL_SET_REG(p_regs, VEPU_REG_QP_VAL, val); +#ifdef RKPLATFORM + hw_ret = mpp_device_send_reg(ctx->dev_ctx, p_regs, VEPU_H264E_VEPU1_NUM_REGS); + if (hw_ret) + mpp_err("mpp_device_send_reg failed ret %d", hw_ret); + else + h264e_hal_dbg(H264E_DBG_DETAIL, "mpp_device_send_reg success!"); + + hw_ret = mpp_device_wait_reg(ctx->dev_ctx, (RK_U32 *)reg_out, VEPU_H264E_VEPU1_NUM_REGS); +#endif + if (hw_ret != MPP_OK) { + h264e_hal_err("hardware returns error:%d", hw_ret); + return MPP_ERR_VPUHW; + } + return MPP_OK; +} MPP_RET hal_h264e_vepu1_wait(void *hal, HalTaskInfo *task) { H264eHalContext *ctx = (H264eHalContext *)hal; - h264e_vepu1_reg_set *reg_out = (h264e_vepu1_reg_set *)ctx->regs; + h264e_vepu1_reg_set reg_out_tmp; + h264e_vepu1_reg_set *reg_out = ®_out_tmp; IOInterruptCB int_cb = ctx->int_cb; h264e_feedback *fb = &ctx->feedback; MppEncPrepCfg *prep = &ctx->set->prep; + RcSyntax *rc_syn = (RcSyntax *)task->enc.syntax.data; + H264eHwCfg *hw_cfg = &ctx->hw_cfg; + RK_S32 num_mb = MPP_ALIGN(prep->width, 16) + * MPP_ALIGN(prep->height, 16) / 16 / 16; + memset(reg_out, 0, sizeof(h264e_vepu1_reg_set)); h264e_hal_enter(); @@ -495,20 +542,56 @@ MPP_RET hal_h264e_vepu1_wait(void *hal, HalTaskInfo *task) hal_h264e_vepu1_set_feedback(fb, reg_out); task->enc.length = fb->out_strm_size; + + hw_cfg->qp_prev = hw_cfg->qp; + if (rc_syn->type == INTER_P_FRAME) { + int dealt_qp = 3; + int cnt = 3; + do { + if (hw_cfg->qp < 30) { + dealt_qp = 5; + } else if (hw_cfg->qp < 42) { + dealt_qp = 3; + } else { + dealt_qp = 2; + } + if (fb->out_strm_size * 8 > (RK_U32)rc_syn->bit_target * 3) { + hal_h264e_vpu1_resend(hal, (RK_U32 *)reg_out, dealt_qp); + hal_h264e_vepu1_set_feedback(fb, reg_out); + task->enc.length = fb->out_strm_size; + hw_cfg->qp_prev = fb->qp_sum / num_mb; + if (cnt-- <= 0) { + break; + } + } else { + break; + } + } while (1); + } if (int_cb.callBack) { RcSyntax *syn = (RcSyntax *)task->enc.syntax.data; - RK_S32 num_mb = MPP_ALIGN(prep->width, 16) - * MPP_ALIGN(prep->height, 16) / 16 / 16; RK_S32 avg_qp = fb->qp_sum / num_mb; RcHalResult result; + RK_S32 i; mpp_assert(avg_qp >= 0); mpp_assert(avg_qp <= 51); + avg_qp = hw_cfg->qp; result.bits = fb->out_strm_size * 8; result.type = syn->type; fb->result = &result; + hw_cfg->qpCtrl.nonZeroCnt = fb->rlc_count; + hw_cfg->qpCtrl.frameBitCnt = result.bits; + if (syn->type == INTER_P_FRAME || syn->gop_mode == MPP_GOP_ALL_INTRA) { + mpp_data_update(ctx->qp_p, avg_qp); + } + + for (i = 0; i < CHECK_POINTS_MAX; i++) { + hw_cfg->qpCtrl.wordCntPrev[i] = fb->cp[i]; + } + mpp_save_regdata((syn->type == INTRA_FRAME) ? (ctx->intra_qs) : (ctx->inter_qs), @@ -517,6 +600,8 @@ MPP_RET hal_h264e_vepu1_wait(void *hal, HalTaskInfo *task) (ctx->intra_qs) : (ctx->inter_qs)); + h264e_vpu_mad_threshold(hw_cfg, ctx->mad, fb->mad_count); + int_cb.callBack(int_cb.opaque, fb); } diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu2.c b/mpp/hal/vpu/h264e/hal_h264e_vepu2.c index b930978e..802af778 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu2.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu2.c @@ -29,7 +29,9 @@ #include "hal_h264e_com.h" #include "hal_h264e_header.h" #include "hal_h264e_vpu_tbl.h" + #include "hal_h264e_vepu.h" +#include "hal_h264e_rc.h" #include "hal_h264e_vepu2.h" #include "hal_h264e_vepu2_reg_tbl.h" @@ -111,6 +113,16 @@ MPP_RET hal_h264e_vepu2_deinit(void *hal) ctx->inter_qs = NULL; } + if (ctx->qp_p) { + mpp_data_deinit(ctx->qp_p); + ctx->qp_p = NULL; + } + + if (ctx->mad) { + mpp_linreg_deinit(ctx->mad); + ctx->mad = NULL; + } + #ifdef RKPLATFORM ret = mpp_device_deinit(ctx->dev_ctx); if (ret) { @@ -480,26 +492,61 @@ static MPP_RET h264e_vpu_set_feedback(h264e_feedback *fb, H264eVpu2RegSet *reg) fb->rlc_count = reg_val[VEPU_REG_RLC_SUM / 4] & 0x3fffff; fb->out_strm_size = reg_val[VEPU_REG_STR_BUF_LIMIT / 4] / 8; - for (i = 0; i < 10; i++) { + for (i = 0; i < CHECK_POINTS_MAX; i++) { RK_U32 cpt = VEPU_REG_CHECKPOINT_RESULT(reg_val[cpt_idx]); if (cpt < cpt_prev) overflow += (1 << 21); + cpt_prev = cpt; fb->cp[i] = cpt + overflow; cpt_idx += (i & 1); } return MPP_OK; } +static MPP_RET hal_h264e_vpu2_resend(H264eHalContext *ctx, RK_U32 *reg_out, RK_S32 dealt_qp) +{ + RK_U32 *p_regs = (RK_U32 *)ctx->regs; + H264eHwCfg *hw_cfg = &ctx->hw_cfg; + RK_U32 val = 0; + RK_S32 hw_ret = 0; + hw_cfg->qp += dealt_qp; + hw_cfg->qp = mpp_clip(hw_cfg->qp, hw_cfg->qp_min, hw_cfg->qp_max); + + val = VEPU_REG_H264_LUMA_INIT_QP(hw_cfg->qp) + | VEPU_REG_H264_QP_MAX(hw_cfg->qp_max) + | VEPU_REG_H264_QP_MIN(hw_cfg->qp_min) + | VEPU_REG_H264_CHKPT_DISTANCE(hw_cfg->cp_distance_mbs); + + H264E_HAL_SET_REG(p_regs, VEPU_REG_QP_VAL, val); +#ifdef RKPLATFORM + hw_ret = mpp_device_send_reg(ctx->dev_ctx, p_regs, VEPU2_H264E_NUM_REGS); + if (hw_ret) + mpp_err("mpp_device_send_reg failed ret %d", hw_ret); + else + h264e_hal_dbg(H264E_DBG_DETAIL, "mpp_device_send_reg success!"); + + hw_ret = mpp_device_wait_reg(ctx->dev_ctx, (RK_U32 *)reg_out, VEPU2_H264E_NUM_REGS); +#endif + if (hw_ret != MPP_OK) { + h264e_hal_err("hardware returns error:%d", hw_ret); + return MPP_ERR_VPUHW; + } + return MPP_OK; +} MPP_RET hal_h264e_vepu2_wait(void *hal, HalTaskInfo *task) { H264eHalContext *ctx = (H264eHalContext *)hal; - H264eVpu2RegSet *reg_out = (H264eVpu2RegSet *)ctx->regs; + H264eVpu2RegSet reg_out_tmp; + H264eVpu2RegSet *reg_out = ®_out_tmp; MppEncPrepCfg *prep = &ctx->set->prep; IOInterruptCB int_cb = ctx->int_cb; h264e_feedback *fb = &ctx->feedback; + RcSyntax *rc_syn = (RcSyntax *)task->enc.syntax.data; + H264eHwCfg *hw_cfg = &ctx->hw_cfg; RK_S32 num_mb = MPP_ALIGN(prep->width, 16) * MPP_ALIGN(prep->height, 16) / 16 / 16; + memset(reg_out, 0, sizeof(H264eVpu2RegSet)); h264e_hal_enter(); #ifdef RKPLATFORM @@ -522,26 +569,67 @@ MPP_RET hal_h264e_vepu2_wait(void *hal, HalTaskInfo *task) h264e_vpu_set_feedback(fb, reg_out); task->enc.length = fb->out_strm_size; + hw_cfg->qp_prev = hw_cfg->qp; + if (rc_syn->type == INTER_P_FRAME) { + int dealt_qp = 3; + int cnt = 3; + do { + if (hw_cfg->qp < 30) { + dealt_qp = 5; + } else if (hw_cfg->qp < 42) { + dealt_qp = 3; + } else { + dealt_qp = 2; + } + if (fb->out_strm_size * 8 > (RK_U32)rc_syn->bit_target * 3) { + hal_h264e_vpu2_resend(hal, (RK_U32 *)reg_out, dealt_qp); + h264e_vpu_set_feedback(fb, reg_out); + task->enc.length = fb->out_strm_size; + hw_cfg->qp_prev = fb->qp_sum / num_mb; + if (cnt-- <= 0) { + break; + } + } else { + break; + } + } while (1); + } + if (int_cb.callBack) { RcSyntax *syn = (RcSyntax *)task->enc.syntax.data; RcHalResult result; + RK_S32 i; RK_S32 avg_qp = fb->qp_sum / num_mb; mpp_assert(avg_qp >= 0); mpp_assert(avg_qp <= 51); + avg_qp = hw_cfg->qp; + result.bits = fb->out_strm_size * 8; result.type = syn->type; fb->result = &result; + hw_cfg->qpCtrl.nonZeroCnt = fb->rlc_count; + hw_cfg->qpCtrl.frameBitCnt = result.bits; + hw_cfg->pre_bit_diff = result.bits - syn->bit_target; + if (syn->type == INTER_P_FRAME || syn->gop_mode == MPP_GOP_ALL_INTRA) { + mpp_data_update(ctx->qp_p, avg_qp); + } + + for (i = 0; i < CHECK_POINTS_MAX; i++) { + hw_cfg->qpCtrl.wordCntPrev[i] = fb->cp[i]; + } mpp_save_regdata((syn->type == INTRA_FRAME) ? (ctx->intra_qs) : (ctx->inter_qs), h264_q_step[avg_qp], result.bits); + mpp_linreg_update((syn->type == INTRA_FRAME) ? (ctx->intra_qs) : (ctx->inter_qs)); + h264e_vpu_mad_threshold(hw_cfg, ctx->mad, fb->mad_count); int_cb.callBack(int_cb.opaque, fb); } diff --git a/mpp/hal/vpu/h264e/hal_h264e_vpu_tbl.h b/mpp/hal/vpu/h264e/hal_h264e_vpu_tbl.h index 24f5ae53..92e9a5ca 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vpu_tbl.h +++ b/mpp/hal/vpu/h264e/hal_h264e_vpu_tbl.h @@ -19,15 +19,6 @@ #include "rk_type.h" -static const RK_S32 h264_q_step[] = { - 3, 3, 3, 4, 4, 5, 5, 6, 7, 7, - 8, 9, 10, 11, 13, 14, 16, 18, 20, 23, - 25, 28, 32, 36, 40, 45, 51, 57, 64, 72, - 80, 90, 101, 114, 128, 144, 160, 180, 203, 228, - 256, 288, 320, 360, 405, 456, 513, 577, 640, 720, - 810, 896 -}; - /* H.264 motion estimation parameters */ static const RK_U32 h264_prev_mode_favor[52] = { 7, 7, 8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, diff --git a/mpp/legacy/vpu_api_legacy.cpp b/mpp/legacy/vpu_api_legacy.cpp index 30809b3a..1f3e7f0c 100644 --- a/mpp/legacy/vpu_api_legacy.cpp +++ b/mpp/legacy/vpu_api_legacy.cpp @@ -191,9 +191,9 @@ static MPP_RET vpu_api_set_enc_cfg(MppCtx mpp_ctx, MppApi *mpi, } else { /* constant bitrate do not limit qp range */ codec_cfg->h264.qp_init = 0; - codec_cfg->h264.qp_max = 48; - codec_cfg->h264.qp_min = 4; - codec_cfg->h264.qp_max_step = 16; + codec_cfg->h264.qp_max = 51; + codec_cfg->h264.qp_min = 10; + codec_cfg->h264.qp_max_step = 4; } } break; case MPP_VIDEO_CodingMJPEG : { @@ -1162,7 +1162,7 @@ RK_S32 VpuApiLegacy::encoder_sendframe(VpuCodecContext *ctx, EncInputStream_t *a RK_U32 height = ctx->height; RK_U32 hor_stride = MPP_ALIGN(width, 16); RK_U32 ver_stride = MPP_ALIGN(height, 16); - RK_S32 pts = (RK_S32)aEncInStrm->timeUs; + RK_S64 pts = aEncInStrm->timeUs; RK_S32 fd = aEncInStrm->bufPhyAddr; RK_U32 size = aEncInStrm->size; @@ -1237,7 +1237,7 @@ RK_S32 VpuApiLegacy::encoder_sendframe(VpuCodecContext *ctx, EncInputStream_t *a } } - vpu_api_dbg_input("w %d h %d input fd %d size %d flag %d pts %lld\n", + vpu_api_dbg_input("w %d h %d input fd %d size %d pts %lld, flag %d \n", width, height, fd, size, aEncInStrm->timeUs, aEncInStrm->nFlags); if (aEncInStrm->nFlags) {