diff --git a/mpp/hal/vpu/h264e/hal_h264e_rc.c b/mpp/hal/vpu/h264e/hal_h264e_rc.c index 15e49bb9..24421de7 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_rc.c +++ b/mpp/hal/vpu/h264e/hal_h264e_rc.c @@ -408,7 +408,7 @@ MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task, mpp_log_f("warning: 16 aligned picture height %d and vertical stride %d do NOT matched\n", aligned_h, prep->ver_stride); - mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 16)); + mpp_assert(prep->hor_stride == MPP_ALIGN(prep->hor_stride , 8)); } hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_uv << 10); @@ -425,7 +425,7 @@ MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task, mpp_log_f("warning: 16 aligned picture height %d and vertical stride %d do NOT matched\n", aligned_h, prep->ver_stride); - mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 16)); + mpp_assert(prep->hor_stride == MPP_ALIGN(prep->hor_stride , 8)); } hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_y << 10); diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu1.c b/mpp/hal/vpu/h264e/hal_h264e_vepu1.c index 46c22d2a..0925cb3a 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu1.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu1.c @@ -239,7 +239,7 @@ MPP_RET hal_h264e_vepu1_gen_regs(void *hal, HalTaskInfo *task) // When offset is zero row length should be total 16 aligned width val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0) | VEPU_REG_IN_IMG_LUMA_OFFSET(0) - | VEPU_REG_IN_IMG_CTRL_ROW_LEN(mbs_in_row * 16) + | VEPU_REG_IN_IMG_CTRL_ROW_LEN(prep->hor_stride) | VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r) | VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b) | VEPU_REG_IN_IMG_CTRL_FMT(hw_cfg->input_format) diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu1_v2.c b/mpp/hal/vpu/h264e/hal_h264e_vepu1_v2.c index e0b846fe..b2d4bb76 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu1_v2.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu1_v2.c @@ -373,7 +373,7 @@ static MPP_RET hal_h264e_vepu1_gen_regs_v2(void *hal, HalEncTask *task) // When offset is zero row length should be total 16 aligned width val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0) | VEPU_REG_IN_IMG_LUMA_OFFSET(0) - | VEPU_REG_IN_IMG_CTRL_ROW_LEN(mb_w * 16) + | VEPU_REG_IN_IMG_CTRL_ROW_LEN(hw_prep->hor_stride) | VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r) | VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b) | VEPU_REG_IN_IMG_CTRL_FMT(hw_prep->src_fmt) diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu2.c b/mpp/hal/vpu/h264e/hal_h264e_vepu2.c index 5c720144..4a92b52c 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu2.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu2.c @@ -226,7 +226,8 @@ MPP_RET hal_h264e_vepu2_gen_regs(void *hal, HalTaskInfo *task) // When offset is zero row length should be total 16 aligned width val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0) | VEPU_REG_IN_IMG_LUMA_OFFSET(0) - | VEPU_REG_IN_IMG_CTRL_ROW_LEN(mb_w * 16); + | VEPU_REG_IN_IMG_CTRL_ROW_LEN(prep->hor_stride); + H264E_HAL_SET_REG(reg, VEPU_REG_INPUT_LUMA_INFO, val); val = VEPU_REG_CHECKPOINT_CHECK1(hw_cfg->cp_target[0]) diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu2_v2.c b/mpp/hal/vpu/h264e/hal_h264e_vepu2_v2.c index 03419cea..ef5c3b08 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu2_v2.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu2_v2.c @@ -348,8 +348,10 @@ static MPP_RET hal_h264e_vepu2_gen_regs_v2(void *hal, HalEncTask *task) RK_U32 scaler = MPP_MAX(1, 200 / (mb_w + mb_h)); RK_U32 skip_penalty = MPP_MIN(255, h264_skip_sad_penalty[hw_mbrc->qp_init] * scaler); + RK_U32 overfill_r = (hw_prep->src_w & 0x0f) ? ((16 - (hw_prep->src_w & 0x0f)) / 4) : 0; + RK_U32 overfill_b = (hw_prep->src_h & 0x0f) ? (16 - (hw_prep->src_h & 0x0f)) : 0; @@ -362,7 +364,8 @@ static MPP_RET hal_h264e_vepu2_gen_regs_v2(void *hal, HalEncTask *task) // When offset is zero row length should be total 16 aligned width val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0) | VEPU_REG_IN_IMG_LUMA_OFFSET(0) - | VEPU_REG_IN_IMG_CTRL_ROW_LEN(mb_w * 16); + | VEPU_REG_IN_IMG_CTRL_ROW_LEN(hw_prep->hor_stride); + H264E_HAL_SET_REG(reg, VEPU_REG_INPUT_LUMA_INFO, val); val = VEPU_REG_CHECKPOINT_CHECK1(hw_mbrc->cp_target[0]) diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.c b/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.c index 9d65639d..fb4abdb4 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.c +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.c @@ -365,25 +365,33 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg) } /* NOTE: vepu only support 8bit encoding and stride must match with width align to 16 */ - RK_S32 hor_stride = MPP_ALIGN(cfg->width, 16); + RK_S32 hor_stride = cfg->hor_stride; RK_S32 ver_stride = cfg->ver_stride; + prep->offset_cb = 0; prep->offset_cr = 0; + prep->hor_stride = hor_stride; switch (format & MPP_FRAME_FMT_MASK) { case MPP_FMT_YUV420SP : { - if (cfg->hor_stride != MPP_ALIGN(cfg->width, 16)) - mpp_log_f("vepu only support 16byte aligned YUV420SP horizontal stride %d vs width %d\n", - cfg->hor_stride, cfg->width); + if (!prep->stride_workaround && + hor_stride != MPP_ALIGN(hor_stride, 8)) { + mpp_log_f("vepu only support 8byte aligned YUV420SP horizontal stride %d vs width %d\n", + hor_stride, cfg->width); + prep->stride_workaround = 1; + } prep->offset_cb = hor_stride * ver_stride; prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16); prep->size_c = hor_stride / 2 * MPP_ALIGN(prep->src_h / 2, 8); } break; case MPP_FMT_YUV420P : { - if (cfg->hor_stride != MPP_ALIGN(cfg->width, 16)) + if (!prep->stride_workaround && + hor_stride != MPP_ALIGN(hor_stride, 16)) { mpp_log_f("vepu only support 16byte aligned YUV420P horizontal stride %d vs width %d\n", - cfg->hor_stride, cfg->width); + hor_stride, cfg->width); + prep->stride_workaround = 1; + } prep->offset_cb = hor_stride * ver_stride; prep->offset_cr = prep->offset_cb + ((hor_stride * ver_stride) / 4); @@ -392,14 +400,17 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg) } break; case MPP_FMT_YUV422_YUYV : case MPP_FMT_YUV422_UYVY : { - if (cfg->hor_stride != (MPP_ALIGN(cfg->width, 16) * 2) && !prep->stride_workaround) { - mpp_log_f("vepu only support 16 pixel aligned YUV422 horizontal stride %d vs width %d\n", - cfg->hor_stride, cfg->width); + if (!prep->stride_workaround && + ((hor_stride != MPP_ALIGN(hor_stride, 8 * 2)) || + (hor_stride < cfg->width * 2))) { + mpp_log_f("vepu only support 16bit pixel aligned YUV422 horizontal stride %d vs width %d\n", + hor_stride, cfg->width); prep->stride_workaround = 1; } - prep->size_y = hor_stride * 2 * MPP_ALIGN(prep->src_h, 16); + prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16); prep->size_c = 0; + prep->hor_stride = hor_stride / 2; } break; case MPP_FMT_RGB565 : case MPP_FMT_BGR565 : @@ -407,14 +418,17 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg) case MPP_FMT_BGR555 : case MPP_FMT_RGB444 : case MPP_FMT_BGR444 : { - if ((cfg->hor_stride != cfg->width * 2) && !prep->stride_workaround) { + if (!prep->stride_workaround && + ((hor_stride != MPP_ALIGN(hor_stride, 8 * 2)) || + (hor_stride < cfg->width * 2))) { mpp_log_f("vepu only support matched 16bit pixel horizontal stride %d vs width %d\n", - cfg->hor_stride, cfg->width); + hor_stride, cfg->width); prep->stride_workaround = 1; } - prep->size_y = hor_stride * 2 * MPP_ALIGN(prep->src_h, 16); + prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16); prep->size_c = 0; + prep->hor_stride = hor_stride / 2; } break; case MPP_FMT_ARGB8888 : case MPP_FMT_ABGR8888 : @@ -422,14 +436,17 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg) case MPP_FMT_BGRA8888 : case MPP_FMT_RGB101010 : case MPP_FMT_BGR101010 : { - if ((cfg->hor_stride != cfg->width * 4) && !prep->stride_workaround) { + if (!prep->stride_workaround && + ((hor_stride == MPP_ALIGN(hor_stride, 8 * 4)) || + (hor_stride < cfg->width * 4))) { mpp_log_f("vepu only support matched 32bit pixel horizontal stride %d vs width %d\n", - cfg->hor_stride, cfg->width); + hor_stride, cfg->width); prep->stride_workaround = 1; } - prep->size_y = hor_stride * 4 * MPP_ALIGN(prep->src_h, 16); + prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16); prep->size_c = 0; + prep->hor_stride = hor_stride / 4; } break; default: { mpp_err_f("invalid format %d", format); @@ -455,16 +472,16 @@ MPP_RET h264e_vepu_prep_get_addr(HalH264eVepuPrep *prep, MppBuffer buffer, (*addr)[2] = fd + (prep->offset_cr << 10); if (size < prep->size_y) - mpp_err("warnning: input buffer size 0x%x is smaller then required size 0x%x", + mpp_err("warnning: input buffer size 0x%x is smaller than required size 0x%x", size, prep->size_y); if (prep->size_c && (prep->offset_cb || prep->offset_cr)) { if (prep->offset_cb && (size < prep->offset_cb + prep->size_c)) - mpp_err("warnning: input buffer size 0x%x is smaller then cb requirement 0x%x + 0x%x", + mpp_err("warnning: input buffer size 0x%x is smaller than cb requirement 0x%x + 0x%x", size, prep->offset_cb, prep->size_c); if (prep->offset_cr && (size < prep->offset_cr + prep->size_c)) - mpp_err("warnning: input buffer size 0x%x is smaller then cb requirement 0x%x + 0x%x", + mpp_err("warnning: input buffer size 0x%x is smaller than cb requirement 0x%x + 0x%x", size, prep->offset_cr, prep->size_c); } diff --git a/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.h b/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.h index 954f5a1d..2ca5fb80 100644 --- a/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.h +++ b/mpp/hal/vpu/h264e/hal_h264e_vepu_v2.h @@ -34,6 +34,8 @@ typedef struct HalH264eVepuInput_t { RK_S32 src_fmt; RK_S32 src_w; RK_S32 src_h; + RK_S32 hor_stride; + size_t size_y; size_t size_c; diff --git a/utils/mpi_enc_utils.c b/utils/mpi_enc_utils.c index 75d4c109..5db008e8 100644 --- a/utils/mpi_enc_utils.c +++ b/utils/mpi_enc_utils.c @@ -33,8 +33,11 @@ RK_S32 mpi_enc_width_default_stride(RK_S32 width, MppFrameFormat fmt) RK_S32 stride = 0; switch (fmt & MPP_FRAME_FMT_MASK) { - case MPP_FMT_YUV420SP : + case MPP_FMT_YUV420SP : { + stride = MPP_ALIGN(width, 8); + } break; case MPP_FMT_YUV420P : { + /* NOTE: 420P need to align to 16 so chroma can align to 8 */ stride = MPP_ALIGN(width, 16); } break; case MPP_FMT_YUV422P: @@ -49,17 +52,20 @@ RK_S32 mpi_enc_width_default_stride(RK_S32 width, MppFrameFormat fmt) case MPP_FMT_YUV422_YVYU : case MPP_FMT_YUV422_UYVY : case MPP_FMT_YUV422_VYUY : { - stride = MPP_ALIGN(width * 2, 16); + /* NOTE: for vepu limitation */ + stride = MPP_ALIGN(width, 8) * 2; } break; case MPP_FMT_RGB888 : case MPP_FMT_BGR888 : { - stride = width * 3; + /* NOTE: for vepu limitation */ + stride = MPP_ALIGN(width, 8) * 3; } break; case MPP_FMT_ARGB8888 : case MPP_FMT_ABGR8888: case MPP_FMT_BGRA8888: case MPP_FMT_RGBA8888: { - stride = width * 4; + /* NOTE: for vepu limitation */ + stride = MPP_ALIGN(width, 8) * 4; } break; default : { mpp_err_f("do not support type %d\n", fmt); diff --git a/utils/utils.c b/utils/utils.c index 04f7954d..bbef5f23 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -527,7 +527,7 @@ MPP_RET fill_image(RK_U8 *buf, RK_U32 width, RK_U32 height, p[x + 0] = i + y + frame_count * 3; p[x + 1] = 128 + i + frame_count * 2; p[x + 2] = 64 + i + frame_count * 5; - p[x + 3] = 0; + p[x + 3] = i + y / 2 + frame_count * 3; } } } break;