mirror of
https://github.com/nyanmisaka/mpp.git
synced 2025-10-06 01:26:49 +08:00
[hal_vepu_v2]: Fix vepu hor_stride setup
vepu h264 input stride must be aligned to 8 for vepu limitation. If it is not 8 aligned some buffer conversion preprocess should be done before the buffer is sent encoder. 1. The hor_stride in vepu hardware is in pixel unit not byte unit. 2. The hor_stride in vepu must be 8 pixels aligned. 3. The YUV420P should be 16 pixels aligned for chroma. 4. Fix log print when hor_stride != width 8align Change-Id: If30136a4673399c3be40a41a055e832db4c321d5 Signed-off-by: sayon.chen <sayon.chen@rock-chips.com>
This commit is contained in:
@@ -408,7 +408,7 @@ MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task,
|
|||||||
mpp_log_f("warning: 16 aligned picture height %d and vertical stride %d do NOT matched\n",
|
mpp_log_f("warning: 16 aligned picture height %d and vertical stride %d do NOT matched\n",
|
||||||
aligned_h, prep->ver_stride);
|
aligned_h, prep->ver_stride);
|
||||||
|
|
||||||
mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 16));
|
mpp_assert(prep->hor_stride == MPP_ALIGN(prep->hor_stride , 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_uv << 10);
|
hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_uv << 10);
|
||||||
@@ -425,7 +425,7 @@ MPP_RET h264e_vpu_update_hw_cfg(H264eHalContext *ctx, HalEncTask *task,
|
|||||||
mpp_log_f("warning: 16 aligned picture height %d and vertical stride %d do NOT matched\n",
|
mpp_log_f("warning: 16 aligned picture height %d and vertical stride %d do NOT matched\n",
|
||||||
aligned_h, prep->ver_stride);
|
aligned_h, prep->ver_stride);
|
||||||
|
|
||||||
mpp_assert(prep->hor_stride == MPP_ALIGN(prep->width, 16));
|
mpp_assert(prep->hor_stride == MPP_ALIGN(prep->hor_stride , 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_y << 10);
|
hw_cfg->input_cb_addr = hw_cfg->input_luma_addr + (offset_y << 10);
|
||||||
|
@@ -239,7 +239,7 @@ MPP_RET hal_h264e_vepu1_gen_regs(void *hal, HalTaskInfo *task)
|
|||||||
// When offset is zero row length should be total 16 aligned width
|
// When offset is zero row length should be total 16 aligned width
|
||||||
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(mbs_in_row * 16)
|
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(prep->hor_stride)
|
||||||
| VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
|
| VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
|
||||||
| VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b)
|
| VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b)
|
||||||
| VEPU_REG_IN_IMG_CTRL_FMT(hw_cfg->input_format)
|
| VEPU_REG_IN_IMG_CTRL_FMT(hw_cfg->input_format)
|
||||||
|
@@ -373,7 +373,7 @@ static MPP_RET hal_h264e_vepu1_gen_regs_v2(void *hal, HalEncTask *task)
|
|||||||
// When offset is zero row length should be total 16 aligned width
|
// When offset is zero row length should be total 16 aligned width
|
||||||
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(mb_w * 16)
|
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(hw_prep->hor_stride)
|
||||||
| VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
|
| VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
|
||||||
| VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b)
|
| VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b)
|
||||||
| VEPU_REG_IN_IMG_CTRL_FMT(hw_prep->src_fmt)
|
| VEPU_REG_IN_IMG_CTRL_FMT(hw_prep->src_fmt)
|
||||||
|
@@ -226,7 +226,8 @@ MPP_RET hal_h264e_vepu2_gen_regs(void *hal, HalTaskInfo *task)
|
|||||||
// When offset is zero row length should be total 16 aligned width
|
// When offset is zero row length should be total 16 aligned width
|
||||||
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(mb_w * 16);
|
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(prep->hor_stride);
|
||||||
|
|
||||||
H264E_HAL_SET_REG(reg, VEPU_REG_INPUT_LUMA_INFO, val);
|
H264E_HAL_SET_REG(reg, VEPU_REG_INPUT_LUMA_INFO, val);
|
||||||
|
|
||||||
val = VEPU_REG_CHECKPOINT_CHECK1(hw_cfg->cp_target[0])
|
val = VEPU_REG_CHECKPOINT_CHECK1(hw_cfg->cp_target[0])
|
||||||
|
@@ -348,8 +348,10 @@ static MPP_RET hal_h264e_vepu2_gen_regs_v2(void *hal, HalEncTask *task)
|
|||||||
RK_U32 scaler = MPP_MAX(1, 200 / (mb_w + mb_h));
|
RK_U32 scaler = MPP_MAX(1, 200 / (mb_w + mb_h));
|
||||||
|
|
||||||
RK_U32 skip_penalty = MPP_MIN(255, h264_skip_sad_penalty[hw_mbrc->qp_init] * scaler);
|
RK_U32 skip_penalty = MPP_MIN(255, h264_skip_sad_penalty[hw_mbrc->qp_init] * scaler);
|
||||||
|
|
||||||
RK_U32 overfill_r = (hw_prep->src_w & 0x0f) ?
|
RK_U32 overfill_r = (hw_prep->src_w & 0x0f) ?
|
||||||
((16 - (hw_prep->src_w & 0x0f)) / 4) : 0;
|
((16 - (hw_prep->src_w & 0x0f)) / 4) : 0;
|
||||||
|
|
||||||
RK_U32 overfill_b = (hw_prep->src_h & 0x0f) ?
|
RK_U32 overfill_b = (hw_prep->src_h & 0x0f) ?
|
||||||
(16 - (hw_prep->src_h & 0x0f)) : 0;
|
(16 - (hw_prep->src_h & 0x0f)) : 0;
|
||||||
|
|
||||||
@@ -362,7 +364,8 @@ static MPP_RET hal_h264e_vepu2_gen_regs_v2(void *hal, HalEncTask *task)
|
|||||||
// When offset is zero row length should be total 16 aligned width
|
// When offset is zero row length should be total 16 aligned width
|
||||||
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
val = VEPU_REG_IN_IMG_CHROMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
| VEPU_REG_IN_IMG_LUMA_OFFSET(0)
|
||||||
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(mb_w * 16);
|
| VEPU_REG_IN_IMG_CTRL_ROW_LEN(hw_prep->hor_stride);
|
||||||
|
|
||||||
H264E_HAL_SET_REG(reg, VEPU_REG_INPUT_LUMA_INFO, val);
|
H264E_HAL_SET_REG(reg, VEPU_REG_INPUT_LUMA_INFO, val);
|
||||||
|
|
||||||
val = VEPU_REG_CHECKPOINT_CHECK1(hw_mbrc->cp_target[0])
|
val = VEPU_REG_CHECKPOINT_CHECK1(hw_mbrc->cp_target[0])
|
||||||
|
@@ -365,25 +365,33 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* NOTE: vepu only support 8bit encoding and stride must match with width align to 16 */
|
/* NOTE: vepu only support 8bit encoding and stride must match with width align to 16 */
|
||||||
RK_S32 hor_stride = MPP_ALIGN(cfg->width, 16);
|
RK_S32 hor_stride = cfg->hor_stride;
|
||||||
RK_S32 ver_stride = cfg->ver_stride;
|
RK_S32 ver_stride = cfg->ver_stride;
|
||||||
|
|
||||||
prep->offset_cb = 0;
|
prep->offset_cb = 0;
|
||||||
prep->offset_cr = 0;
|
prep->offset_cr = 0;
|
||||||
|
prep->hor_stride = hor_stride;
|
||||||
|
|
||||||
switch (format & MPP_FRAME_FMT_MASK) {
|
switch (format & MPP_FRAME_FMT_MASK) {
|
||||||
case MPP_FMT_YUV420SP : {
|
case MPP_FMT_YUV420SP : {
|
||||||
if (cfg->hor_stride != MPP_ALIGN(cfg->width, 16))
|
if (!prep->stride_workaround &&
|
||||||
mpp_log_f("vepu only support 16byte aligned YUV420SP horizontal stride %d vs width %d\n",
|
hor_stride != MPP_ALIGN(hor_stride, 8)) {
|
||||||
cfg->hor_stride, cfg->width);
|
mpp_log_f("vepu only support 8byte aligned YUV420SP horizontal stride %d vs width %d\n",
|
||||||
|
hor_stride, cfg->width);
|
||||||
|
prep->stride_workaround = 1;
|
||||||
|
}
|
||||||
|
|
||||||
prep->offset_cb = hor_stride * ver_stride;
|
prep->offset_cb = hor_stride * ver_stride;
|
||||||
prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16);
|
prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16);
|
||||||
prep->size_c = hor_stride / 2 * MPP_ALIGN(prep->src_h / 2, 8);
|
prep->size_c = hor_stride / 2 * MPP_ALIGN(prep->src_h / 2, 8);
|
||||||
} break;
|
} break;
|
||||||
case MPP_FMT_YUV420P : {
|
case MPP_FMT_YUV420P : {
|
||||||
if (cfg->hor_stride != MPP_ALIGN(cfg->width, 16))
|
if (!prep->stride_workaround &&
|
||||||
|
hor_stride != MPP_ALIGN(hor_stride, 16)) {
|
||||||
mpp_log_f("vepu only support 16byte aligned YUV420P horizontal stride %d vs width %d\n",
|
mpp_log_f("vepu only support 16byte aligned YUV420P horizontal stride %d vs width %d\n",
|
||||||
cfg->hor_stride, cfg->width);
|
hor_stride, cfg->width);
|
||||||
|
prep->stride_workaround = 1;
|
||||||
|
}
|
||||||
|
|
||||||
prep->offset_cb = hor_stride * ver_stride;
|
prep->offset_cb = hor_stride * ver_stride;
|
||||||
prep->offset_cr = prep->offset_cb + ((hor_stride * ver_stride) / 4);
|
prep->offset_cr = prep->offset_cb + ((hor_stride * ver_stride) / 4);
|
||||||
@@ -392,14 +400,17 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg)
|
|||||||
} break;
|
} break;
|
||||||
case MPP_FMT_YUV422_YUYV :
|
case MPP_FMT_YUV422_YUYV :
|
||||||
case MPP_FMT_YUV422_UYVY : {
|
case MPP_FMT_YUV422_UYVY : {
|
||||||
if (cfg->hor_stride != (MPP_ALIGN(cfg->width, 16) * 2) && !prep->stride_workaround) {
|
if (!prep->stride_workaround &&
|
||||||
mpp_log_f("vepu only support 16 pixel aligned YUV422 horizontal stride %d vs width %d\n",
|
((hor_stride != MPP_ALIGN(hor_stride, 8 * 2)) ||
|
||||||
cfg->hor_stride, cfg->width);
|
(hor_stride < cfg->width * 2))) {
|
||||||
|
mpp_log_f("vepu only support 16bit pixel aligned YUV422 horizontal stride %d vs width %d\n",
|
||||||
|
hor_stride, cfg->width);
|
||||||
prep->stride_workaround = 1;
|
prep->stride_workaround = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
prep->size_y = hor_stride * 2 * MPP_ALIGN(prep->src_h, 16);
|
prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16);
|
||||||
prep->size_c = 0;
|
prep->size_c = 0;
|
||||||
|
prep->hor_stride = hor_stride / 2;
|
||||||
} break;
|
} break;
|
||||||
case MPP_FMT_RGB565 :
|
case MPP_FMT_RGB565 :
|
||||||
case MPP_FMT_BGR565 :
|
case MPP_FMT_BGR565 :
|
||||||
@@ -407,14 +418,17 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg)
|
|||||||
case MPP_FMT_BGR555 :
|
case MPP_FMT_BGR555 :
|
||||||
case MPP_FMT_RGB444 :
|
case MPP_FMT_RGB444 :
|
||||||
case MPP_FMT_BGR444 : {
|
case MPP_FMT_BGR444 : {
|
||||||
if ((cfg->hor_stride != cfg->width * 2) && !prep->stride_workaround) {
|
if (!prep->stride_workaround &&
|
||||||
|
((hor_stride != MPP_ALIGN(hor_stride, 8 * 2)) ||
|
||||||
|
(hor_stride < cfg->width * 2))) {
|
||||||
mpp_log_f("vepu only support matched 16bit pixel horizontal stride %d vs width %d\n",
|
mpp_log_f("vepu only support matched 16bit pixel horizontal stride %d vs width %d\n",
|
||||||
cfg->hor_stride, cfg->width);
|
hor_stride, cfg->width);
|
||||||
prep->stride_workaround = 1;
|
prep->stride_workaround = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
prep->size_y = hor_stride * 2 * MPP_ALIGN(prep->src_h, 16);
|
prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16);
|
||||||
prep->size_c = 0;
|
prep->size_c = 0;
|
||||||
|
prep->hor_stride = hor_stride / 2;
|
||||||
} break;
|
} break;
|
||||||
case MPP_FMT_ARGB8888 :
|
case MPP_FMT_ARGB8888 :
|
||||||
case MPP_FMT_ABGR8888 :
|
case MPP_FMT_ABGR8888 :
|
||||||
@@ -422,14 +436,17 @@ MPP_RET h264e_vepu_prep_setup(HalH264eVepuPrep *prep, MppEncPrepCfg *cfg)
|
|||||||
case MPP_FMT_BGRA8888 :
|
case MPP_FMT_BGRA8888 :
|
||||||
case MPP_FMT_RGB101010 :
|
case MPP_FMT_RGB101010 :
|
||||||
case MPP_FMT_BGR101010 : {
|
case MPP_FMT_BGR101010 : {
|
||||||
if ((cfg->hor_stride != cfg->width * 4) && !prep->stride_workaround) {
|
if (!prep->stride_workaround &&
|
||||||
|
((hor_stride == MPP_ALIGN(hor_stride, 8 * 4)) ||
|
||||||
|
(hor_stride < cfg->width * 4))) {
|
||||||
mpp_log_f("vepu only support matched 32bit pixel horizontal stride %d vs width %d\n",
|
mpp_log_f("vepu only support matched 32bit pixel horizontal stride %d vs width %d\n",
|
||||||
cfg->hor_stride, cfg->width);
|
hor_stride, cfg->width);
|
||||||
prep->stride_workaround = 1;
|
prep->stride_workaround = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
prep->size_y = hor_stride * 4 * MPP_ALIGN(prep->src_h, 16);
|
prep->size_y = hor_stride * MPP_ALIGN(prep->src_h, 16);
|
||||||
prep->size_c = 0;
|
prep->size_c = 0;
|
||||||
|
prep->hor_stride = hor_stride / 4;
|
||||||
} break;
|
} break;
|
||||||
default: {
|
default: {
|
||||||
mpp_err_f("invalid format %d", format);
|
mpp_err_f("invalid format %d", format);
|
||||||
@@ -455,16 +472,16 @@ MPP_RET h264e_vepu_prep_get_addr(HalH264eVepuPrep *prep, MppBuffer buffer,
|
|||||||
(*addr)[2] = fd + (prep->offset_cr << 10);
|
(*addr)[2] = fd + (prep->offset_cr << 10);
|
||||||
|
|
||||||
if (size < prep->size_y)
|
if (size < prep->size_y)
|
||||||
mpp_err("warnning: input buffer size 0x%x is smaller then required size 0x%x",
|
mpp_err("warnning: input buffer size 0x%x is smaller than required size 0x%x",
|
||||||
size, prep->size_y);
|
size, prep->size_y);
|
||||||
|
|
||||||
if (prep->size_c && (prep->offset_cb || prep->offset_cr)) {
|
if (prep->size_c && (prep->offset_cb || prep->offset_cr)) {
|
||||||
if (prep->offset_cb && (size < prep->offset_cb + prep->size_c))
|
if (prep->offset_cb && (size < prep->offset_cb + prep->size_c))
|
||||||
mpp_err("warnning: input buffer size 0x%x is smaller then cb requirement 0x%x + 0x%x",
|
mpp_err("warnning: input buffer size 0x%x is smaller than cb requirement 0x%x + 0x%x",
|
||||||
size, prep->offset_cb, prep->size_c);
|
size, prep->offset_cb, prep->size_c);
|
||||||
|
|
||||||
if (prep->offset_cr && (size < prep->offset_cr + prep->size_c))
|
if (prep->offset_cr && (size < prep->offset_cr + prep->size_c))
|
||||||
mpp_err("warnning: input buffer size 0x%x is smaller then cb requirement 0x%x + 0x%x",
|
mpp_err("warnning: input buffer size 0x%x is smaller than cb requirement 0x%x + 0x%x",
|
||||||
size, prep->offset_cr, prep->size_c);
|
size, prep->offset_cr, prep->size_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -34,6 +34,8 @@ typedef struct HalH264eVepuInput_t {
|
|||||||
RK_S32 src_fmt;
|
RK_S32 src_fmt;
|
||||||
RK_S32 src_w;
|
RK_S32 src_w;
|
||||||
RK_S32 src_h;
|
RK_S32 src_h;
|
||||||
|
RK_S32 hor_stride;
|
||||||
|
|
||||||
size_t size_y;
|
size_t size_y;
|
||||||
size_t size_c;
|
size_t size_c;
|
||||||
|
|
||||||
|
@@ -33,8 +33,11 @@ RK_S32 mpi_enc_width_default_stride(RK_S32 width, MppFrameFormat fmt)
|
|||||||
RK_S32 stride = 0;
|
RK_S32 stride = 0;
|
||||||
|
|
||||||
switch (fmt & MPP_FRAME_FMT_MASK) {
|
switch (fmt & MPP_FRAME_FMT_MASK) {
|
||||||
case MPP_FMT_YUV420SP :
|
case MPP_FMT_YUV420SP : {
|
||||||
|
stride = MPP_ALIGN(width, 8);
|
||||||
|
} break;
|
||||||
case MPP_FMT_YUV420P : {
|
case MPP_FMT_YUV420P : {
|
||||||
|
/* NOTE: 420P need to align to 16 so chroma can align to 8 */
|
||||||
stride = MPP_ALIGN(width, 16);
|
stride = MPP_ALIGN(width, 16);
|
||||||
} break;
|
} break;
|
||||||
case MPP_FMT_YUV422P:
|
case MPP_FMT_YUV422P:
|
||||||
@@ -49,17 +52,20 @@ RK_S32 mpi_enc_width_default_stride(RK_S32 width, MppFrameFormat fmt)
|
|||||||
case MPP_FMT_YUV422_YVYU :
|
case MPP_FMT_YUV422_YVYU :
|
||||||
case MPP_FMT_YUV422_UYVY :
|
case MPP_FMT_YUV422_UYVY :
|
||||||
case MPP_FMT_YUV422_VYUY : {
|
case MPP_FMT_YUV422_VYUY : {
|
||||||
stride = MPP_ALIGN(width * 2, 16);
|
/* NOTE: for vepu limitation */
|
||||||
|
stride = MPP_ALIGN(width, 8) * 2;
|
||||||
} break;
|
} break;
|
||||||
case MPP_FMT_RGB888 :
|
case MPP_FMT_RGB888 :
|
||||||
case MPP_FMT_BGR888 : {
|
case MPP_FMT_BGR888 : {
|
||||||
stride = width * 3;
|
/* NOTE: for vepu limitation */
|
||||||
|
stride = MPP_ALIGN(width, 8) * 3;
|
||||||
} break;
|
} break;
|
||||||
case MPP_FMT_ARGB8888 :
|
case MPP_FMT_ARGB8888 :
|
||||||
case MPP_FMT_ABGR8888:
|
case MPP_FMT_ABGR8888:
|
||||||
case MPP_FMT_BGRA8888:
|
case MPP_FMT_BGRA8888:
|
||||||
case MPP_FMT_RGBA8888: {
|
case MPP_FMT_RGBA8888: {
|
||||||
stride = width * 4;
|
/* NOTE: for vepu limitation */
|
||||||
|
stride = MPP_ALIGN(width, 8) * 4;
|
||||||
} break;
|
} break;
|
||||||
default : {
|
default : {
|
||||||
mpp_err_f("do not support type %d\n", fmt);
|
mpp_err_f("do not support type %d\n", fmt);
|
||||||
|
@@ -527,7 +527,7 @@ MPP_RET fill_image(RK_U8 *buf, RK_U32 width, RK_U32 height,
|
|||||||
p[x + 0] = i + y + frame_count * 3;
|
p[x + 0] = i + y + frame_count * 3;
|
||||||
p[x + 1] = 128 + i + frame_count * 2;
|
p[x + 1] = 128 + i + frame_count * 2;
|
||||||
p[x + 2] = 64 + i + frame_count * 5;
|
p[x + 2] = 64 + i + frame_count * 5;
|
||||||
p[x + 3] = 0;
|
p[x + 3] = i + y / 2 + frame_count * 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
Reference in New Issue
Block a user