diff --git a/mpp/codec/dec/h265/h265d_parser2_syntax.c b/mpp/codec/dec/h265/h265d_parser2_syntax.c index 44497339..d63e3028 100644 --- a/mpp/codec/dec/h265/h265d_parser2_syntax.c +++ b/mpp/codec/dec/h265/h265d_parser2_syntax.c @@ -58,6 +58,8 @@ static void fill_picture_parameters(const HEVCContext *h, const HEVCSPS *sps = (HEVCSPS *)h->sps_list[pps->sps_id]; const ShortTermRPS *src_rps = sps->st_rps; Short_SPS_RPS_HEVC *dst_rps = pp->sps_st_rps; + const ShortTermRPS *cur_src_rps = h->sh.short_term_rps; + Short_SPS_RPS_HEVC *cur_dst_rps = &pp->cur_st_rps; RK_U32 i, j; RK_U32 rps_used[16]; @@ -173,6 +175,19 @@ static void fill_picture_parameters(const HEVCContext *h, pp->sps_lt_rps[i].used_by_curr_pic_lt_flag = sps->used_by_curr_pic_lt_sps_flag[i]; } + if (cur_src_rps) { + RK_U32 n_pics = h->sh.short_term_rps->num_negative_pics; + cur_dst_rps->num_negative_pics = n_pics; + cur_dst_rps->num_positive_pics = cur_src_rps->num_delta_pocs - n_pics; + for (i = 0; i < cur_dst_rps->num_negative_pics; i++) { + cur_dst_rps->delta_poc_s0[i] = cur_src_rps->delta_poc[i]; + cur_dst_rps->s0_used_flag[i] = cur_src_rps->used[i]; + } + for (i = 0; i < cur_dst_rps->num_positive_pics; i++) { + cur_dst_rps->delta_poc_s1[i] = cur_src_rps->delta_poc[i + n_pics]; + cur_dst_rps->s1_used_flag[i] = cur_src_rps->used[i + n_pics]; + } + } for (i = 0; i < 64; i++) { if (i < sps->nb_st_rps) { diff --git a/mpp/common/h265d_syntax.h b/mpp/common/h265d_syntax.h index 96b11fcf..1706317b 100644 --- a/mpp/common/h265d_syntax.h +++ b/mpp/common/h265d_syntax.h @@ -189,6 +189,7 @@ typedef struct _DXVA_PicParams_HEVC { UINT32 sps_id; INT current_poc; + Short_SPS_RPS_HEVC cur_st_rps; Short_SPS_RPS_HEVC sps_st_rps[64]; LT_SPS_RPS_HEVC sps_lt_rps[32]; diff --git a/mpp/hal/rkdec/CMakeLists.txt b/mpp/hal/rkdec/CMakeLists.txt index 982c2cb8..03f1fcc3 100644 --- a/mpp/hal/rkdec/CMakeLists.txt +++ b/mpp/hal/rkdec/CMakeLists.txt @@ -3,6 +3,7 @@ include_directories(inc) add_library(vdpu34x_com STATIC vdpu34x_com.c vdpu382_com.c) add_library(vdpu383_com STATIC vdpu383_com.c) +add_library(vdpu384a_com STATIC vdpu384a_com.c) if( HAVE_AVSD ) add_subdirectory(avsd) diff --git a/mpp/hal/rkdec/h264d/CMakeLists.txt b/mpp/hal/rkdec/h264d/CMakeLists.txt index 788747f7..c16ba2f8 100644 --- a/mpp/hal/rkdec/h264d/CMakeLists.txt +++ b/mpp/hal/rkdec/h264d/CMakeLists.txt @@ -6,6 +6,7 @@ set(HAL_H264D_SRC hal_h264d_vdpu34x.c hal_h264d_vdpu382.c hal_h264d_vdpu383.c + hal_h264d_vdpu384a.c hal_h264d_rkv_reg.c hal_h264d_vdpu2.c hal_h264d_vdpu1.c @@ -14,6 +15,6 @@ set(HAL_H264D_SRC add_library(hal_h264d STATIC ${HAL_H264D_SRC}) -target_link_libraries(hal_h264d vdpu34x_com vdpu383_com mpp_base mpp_hal) +target_link_libraries(hal_h264d vdpu34x_com vdpu383_com vdpu384a_com mpp_base mpp_hal) set_target_properties(hal_h264d PROPERTIES FOLDER "mpp/hal") diff --git a/mpp/hal/rkdec/h264d/hal_h264d_api.c b/mpp/hal/rkdec/h264d/hal_h264d_api.c index 430aeb1a..40b67393 100644 --- a/mpp/hal/rkdec/h264d/hal_h264d_api.c +++ b/mpp/hal/rkdec/h264d/hal_h264d_api.c @@ -40,6 +40,7 @@ #include "hal_h264d_vdpu34x.h" #include "hal_h264d_vdpu382.h" #include "hal_h264d_vdpu383.h" +#include "hal_h264d_vdpu384a.h" #include "hal_h264d_vdpu2.h" #include "hal_h264d_vdpu1.h" @@ -297,6 +298,9 @@ MPP_RET hal_h264d_init(void *hal, MppHalCfg *cfg) case HWID_VDPU383 : { p_hal->hal_api = &hal_h264d_vdpu383; } break; + case HWID_VDPU384A : { + p_hal->hal_api = &hal_h264d_vdpu384a; + } break; case HWID_VDPU382_RK3528 : case HWID_VDPU382_RK3562 : { p_hal->hal_api = &hal_h264d_vdpu382; diff --git a/mpp/hal/rkdec/h264d/hal_h264d_vdpu384a.c b/mpp/hal/rkdec/h264d/hal_h264d_vdpu384a.c new file mode 100644 index 00000000..cbaac403 --- /dev/null +++ b/mpp/hal/rkdec/h264d/hal_h264d_vdpu384a.c @@ -0,0 +1,1066 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#define MODULE_TAG "hal_h264d_vdpu384a" + +#include + +#include "mpp_env.h" +#include "mpp_mem.h" +#include "mpp_common.h" +#include "mpp_bitput.h" +#include "mpp_buffer_impl.h" + +#include "hal_h264d_global.h" +#include "hal_h264d_vdpu384a.h" +#include "vdpu384a_h264d.h" +#include "mpp_dec_cb_param.h" + +/* Number registers for the decoder */ +#define DEC_VDPU384A_REGISTERS 276 + +#define VDPU384A_SPSPPS_SIZE (MPP_ALIGN(2266 + 64, 128) / 8) /* byte, 2266 bit + Reserve 64 */ +#define VDPU384A_SCALING_LIST_SIZE (6*16+2*64 + 128) /* bytes */ +#define VDPU384A_ERROR_INFO_SIZE (256*144*4) /* bytes */ +#define H264_CTU_SIZE 16 + +#define VDPU384A_ERROR_INFO_ALIGNED_SIZE (0) +#define VDPU384A_SPSPPS_ALIGNED_SIZE (MPP_ALIGN(VDPU384A_SPSPPS_SIZE, SZ_4K)) +#define VDPU384A_SCALING_LIST_ALIGNED_SIZE (MPP_ALIGN(VDPU384A_SCALING_LIST_SIZE, SZ_4K)) +#define VDPU384A_STREAM_INFO_SET_SIZE (VDPU384A_SPSPPS_ALIGNED_SIZE + \ + VDPU384A_SCALING_LIST_ALIGNED_SIZE) + +#define VDPU384A_ERROR_INFO_OFFSET (0) +#define VDPU384A_STREAM_INFO_OFFSET_BASE (VDPU384A_ERROR_INFO_OFFSET + VDPU384A_ERROR_INFO_ALIGNED_SIZE) +#define VDPU384A_SPSPPS_OFFSET(pos) (VDPU384A_STREAM_INFO_OFFSET_BASE + (VDPU384A_STREAM_INFO_SET_SIZE * pos)) +#define VDPU384A_SCALING_LIST_OFFSET(pos) (VDPU384A_SPSPPS_OFFSET(pos) + VDPU384A_SPSPPS_ALIGNED_SIZE) +#define VDPU384A_INFO_BUFFER_SIZE(cnt) (VDPU384A_STREAM_INFO_OFFSET_BASE + (VDPU384A_STREAM_INFO_SET_SIZE * cnt)) + +#define SET_REF_INFO(regs, index, field, value)\ + do{ \ + switch(index){\ + case 0: regs.reg99.ref0_##field = value; break;\ + case 1: regs.reg99.ref1_##field = value; break;\ + case 2: regs.reg99.ref2_##field = value; break;\ + case 3: regs.reg99.ref3_##field = value; break;\ + case 4: regs.reg100.ref4_##field = value; break;\ + case 5: regs.reg100.ref5_##field = value; break;\ + case 6: regs.reg100.ref6_##field = value; break;\ + case 7: regs.reg100.ref7_##field = value; break;\ + case 8: regs.reg101.ref8_##field = value; break;\ + case 9: regs.reg101.ref9_##field = value; break;\ + case 10: regs.reg101.ref10_##field = value; break;\ + case 11: regs.reg101.ref11_##field = value; break;\ + case 12: regs.reg102.ref12_##field = value; break;\ + case 13: regs.reg102.ref13_##field = value; break;\ + case 14: regs.reg102.ref14_##field = value; break;\ + case 15: regs.reg102.ref15_##field = value; break;\ + default: break;}\ + }while(0) + +#define VDPU384A_FAST_REG_SET_CNT 3 + +typedef struct h264d_rkv_buf_t { + RK_U32 valid; + Vdpu384aH264dRegSet *regs; +} H264dRkvBuf_t; + +typedef struct Vdpu384aH264dRegCtx_t { + RK_U8 spspps[VDPU384A_SPSPPS_SIZE]; + RK_U8 sclst[VDPU384A_SCALING_LIST_SIZE]; + + MppBuffer bufs; + RK_S32 bufs_fd; + void *bufs_ptr; + RK_U32 offset_errinfo; + RK_U32 offset_spspps[VDPU384A_FAST_REG_SET_CNT]; + RK_U32 offset_sclst[VDPU384A_FAST_REG_SET_CNT]; + + H264dRkvBuf_t reg_buf[VDPU384A_FAST_REG_SET_CNT]; + + RK_U32 spspps_offset; + RK_U32 sclst_offset; + + RK_S32 width; + RK_S32 height; + /* rcb buffers info */ + RK_U32 bit_depth; + RK_U32 mbaff; + RK_U32 chroma_format_idc; + + RK_S32 rcb_buf_size; + Vdpu384aRcbInfo rcb_info[RCB_BUF_COUNT]; + MppBuffer rcb_buf[VDPU384A_FAST_REG_SET_CNT]; + + Vdpu384aH264dRegSet *regs; + HalBufs origin_bufs; +} Vdpu384aH264dRegCtx; + +MPP_RET vdpu384a_h264d_deinit(void *hal); +static RK_U32 rkv_ver_align(RK_U32 val) +{ + return MPP_ALIGN(val, 16); +} + +static RK_U32 rkv_len_align(RK_U32 val) +{ + return (MPP_ALIGN(val, 16) * 3 / 2); +} + +static RK_U32 rkv_len_align_422(RK_U32 val) +{ + return ((5 * MPP_ALIGN(val, 16)) / 2); +} + +static MPP_RET vdpu384a_setup_scale_origin_bufs(H264dHalCtx_t *p_hal, MppFrame mframe) +{ + Vdpu384aH264dRegCtx *ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + /* for 8K FrameBuf scale mode */ + size_t origin_buf_size = 0; + + origin_buf_size = mpp_frame_get_buf_size(mframe); + + if (!origin_buf_size) { + mpp_err_f("origin_bufs get buf size failed\n"); + return MPP_NOK; + } + if (ctx->origin_bufs) { + hal_bufs_deinit(ctx->origin_bufs); + ctx->origin_bufs = NULL; + } + hal_bufs_init(&ctx->origin_bufs); + if (!ctx->origin_bufs) { + mpp_err_f("origin_bufs init fail\n"); + return MPP_ERR_NOMEM; + } + hal_bufs_setup(ctx->origin_bufs, 16, 1, &origin_buf_size); + + return MPP_OK; +} + +static MPP_RET prepare_spspps(H264dHalCtx_t *p_hal, RK_U64 *data, RK_U32 len) +{ + RK_S32 i = 0, j = 0; + RK_S32 is_long_term = 0, voidx = 0; + DXVA_PicParams_H264_MVC *pp = p_hal->pp; + RK_U32 tmp = 0; + BitputCtx_t bp; + + mpp_set_bitput_ctx(&bp, data, len); + + if (!p_hal->fast_mode && !pp->spspps_update) { + bp.index = 2; + bp.bitpos = 24; + bp.bvalue = bp.pbuf[bp.index] & 0xFFFFFF; + } else { + RK_U32 pic_width, pic_height; + + //!< sps syntax + pic_width = 16 * (pp->wFrameWidthInMbsMinus1 + 1); + pic_height = 16 * (pp->wFrameHeightInMbsMinus1 + 1); + pic_height *= (2 - pp->frame_mbs_only_flag); + pic_height /= (1 + pp->field_pic_flag); + mpp_put_bits(&bp, pp->seq_parameter_set_id, 4); + mpp_put_bits(&bp, pp->profile_idc, 8); + mpp_put_bits(&bp, pp->constraint_set3_flag, 1); + mpp_put_bits(&bp, pp->chroma_format_idc, 2); + mpp_put_bits(&bp, pp->bit_depth_luma_minus8, 3); + mpp_put_bits(&bp, pp->bit_depth_chroma_minus8, 3); + mpp_put_bits(&bp, 0, 1); // set 0 + mpp_put_bits(&bp, pp->log2_max_frame_num_minus4, 4); + mpp_put_bits(&bp, pp->num_ref_frames, 5); + mpp_put_bits(&bp, pp->pic_order_cnt_type, 2); + mpp_put_bits(&bp, pp->log2_max_pic_order_cnt_lsb_minus4, 4); + mpp_put_bits(&bp, pp->delta_pic_order_always_zero_flag, 1); + mpp_put_bits(&bp, pic_width, 16); + mpp_put_bits(&bp, pic_height, 16); + mpp_put_bits(&bp, pp->frame_mbs_only_flag, 1); + mpp_put_bits(&bp, pp->MbaffFrameFlag, 1); + mpp_put_bits(&bp, pp->direct_8x8_inference_flag, 1); + /* multi-view */ + mpp_put_bits(&bp, pp->mvc_extension_enable, 1); + if (pp->mvc_extension_enable) { + mpp_put_bits(&bp, (pp->num_views_minus1 + 1), 2); + mpp_put_bits(&bp, pp->view_id[0], 10); + mpp_put_bits(&bp, pp->view_id[1], 10); + } else { + mpp_put_bits(&bp, 0, 22); + } + // hw_fifo_align_bits(&bp, 128); + //!< pps syntax + mpp_put_bits(&bp, pp->pps_pic_parameter_set_id, 8); + mpp_put_bits(&bp, pp->pps_seq_parameter_set_id, 5); + mpp_put_bits(&bp, pp->entropy_coding_mode_flag, 1); + mpp_put_bits(&bp, pp->pic_order_present_flag, 1); + + mpp_put_bits(&bp, pp->num_ref_idx_l0_active_minus1, 5); + mpp_put_bits(&bp, pp->num_ref_idx_l1_active_minus1, 5); + mpp_put_bits(&bp, pp->weighted_pred_flag, 1); + mpp_put_bits(&bp, pp->weighted_bipred_idc, 2); + mpp_put_bits(&bp, pp->pic_init_qp_minus26, 7); + mpp_put_bits(&bp, pp->pic_init_qs_minus26, 6); + mpp_put_bits(&bp, pp->chroma_qp_index_offset, 5); + mpp_put_bits(&bp, pp->deblocking_filter_control_present_flag, 1); + mpp_put_bits(&bp, pp->constrained_intra_pred_flag, 1); + mpp_put_bits(&bp, pp->redundant_pic_cnt_present_flag, 1); + mpp_put_bits(&bp, pp->transform_8x8_mode_flag, 1); + mpp_put_bits(&bp, pp->second_chroma_qp_index_offset, 5); + mpp_put_bits(&bp, pp->scaleing_list_enable_flag, 1); + } + + //!< set dpb + for (i = 0; i < 16; i++) { + is_long_term = (pp->RefFrameList[i].bPicEntry != 0xff) ? pp->RefFrameList[i].AssociatedFlag : 0; + tmp |= (RK_U32)(is_long_term & 0x1) << i; + } + for (i = 0; i < 16; i++) { + voidx = (pp->RefFrameList[i].bPicEntry != 0xff) ? pp->RefPicLayerIdList[i] : 0; + tmp |= (RK_U32)(voidx & 0x1) << (i + 16); + } + mpp_put_bits(&bp, tmp, 32); + /* set current frame */ + mpp_put_bits(&bp, pp->field_pic_flag, 1); + mpp_put_bits(&bp, (pp->field_pic_flag && pp->CurrPic.AssociatedFlag), 1); + + mpp_put_bits(&bp, pp->CurrFieldOrderCnt[0], 32); + mpp_put_bits(&bp, pp->CurrFieldOrderCnt[1], 32); + + /* refer poc */ + for (i = 0; i < 16; i++) { + mpp_put_bits(&bp, pp->FieldOrderCntList[i][0], 32); + mpp_put_bits(&bp, pp->FieldOrderCntList[i][1], 32); + } + + tmp = 0; + for (i = 0; i < 16; i++) + tmp |= ((pp->RefPicFiledFlags >> i) & 0x01) << i; + for (i = 0; i < 16; i++) + tmp |= ((pp->UsedForReferenceFlags >> (2 * i + 0)) & 0x01) << (i + 16); + mpp_put_bits(&bp, tmp, 32); + + tmp = 0; + for (i = 0; i < 16; i++) + tmp |= ((pp->UsedForReferenceFlags >> (2 * i + 1)) & 0x01) << i; + for (i = 0; i < 16; i++) + tmp |= ((pp->RefPicColmvUsedFlags >> i) & 0x01) << (i + 16); + mpp_put_bits(&bp, tmp, 32); + + /* rps */ + { + RK_S32 dpb_idx = 0; + RK_S32 dpb_valid = 0, bottom_flag = 0; + RK_U32 max_frame_num = 0; + RK_U16 frame_num_wrap = 0; + + max_frame_num = 1 << (pp->log2_max_frame_num_minus4 + 4); + for (i = 0; i < 16; i++) { + if ((pp->NonExistingFrameFlags >> i) & 0x01) { + frame_num_wrap = 0; + } else { + if (pp->RefFrameList[i].AssociatedFlag) { + frame_num_wrap = pp->FrameNumList[i]; + } else { + frame_num_wrap = (pp->FrameNumList[i] > pp->frame_num) ? + (pp->FrameNumList[i] - max_frame_num) : pp->FrameNumList[i]; + } + } + mpp_put_bits(&bp, frame_num_wrap, 16); + } + + /* dbp_idx_p_l0_32x7bit + dbp_idx_b_l0_32x7bit + dbp_idx_b_l1_32x7bit */ + for (j = 0; j < 3; j++) { + for (i = 0; i < 32; i++) { + tmp = 0; + dpb_valid = (p_hal->slice_long[0].RefPicList[j][i].bPicEntry == 0xff) ? 0 : 1; + dpb_idx = dpb_valid ? p_hal->slice_long[0].RefPicList[j][i].Index7Bits : 0; + bottom_flag = dpb_valid ? p_hal->slice_long[0].RefPicList[j][i].AssociatedFlag : 0; + voidx = dpb_valid ? pp->RefPicLayerIdList[dpb_idx] : 0; + tmp |= (RK_U32)(dpb_idx | (dpb_valid << 4)) & 0x1f; + tmp |= (RK_U32)(bottom_flag & 0x1) << 5; + if (dpb_valid) + tmp |= (RK_U32)(voidx & 0x1) << 6; + mpp_put_bits(&bp, tmp, 7); + } + } + } + mpp_put_align(&bp, 64, 0);//128 + +#ifdef DUMP_VDPU384A_DATAS + { + char *cur_fname = "global_cfg.dat"; + memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path)); + sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname); + dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 64 * bp.index + bp.bitpos, 128, 0); + } +#endif + + return MPP_OK; +} + +static MPP_RET prepare_scanlist(H264dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len) +{ + RK_U32 i = 0, j = 0, n = 0; + + if (!p_hal->pp->scaleing_list_enable_flag) + return MPP_OK; + + for (i = 0; i < 6; i++) { //4x4, 6 lists + /* dump by block4x4, vectial direction */ + for (j = 0; j < 4; j++) { + data[n++] = p_hal->qm->bScalingLists4x4[i][j * 4 + 0]; + data[n++] = p_hal->qm->bScalingLists4x4[i][j * 4 + 1]; + data[n++] = p_hal->qm->bScalingLists4x4[i][j * 4 + 2]; + data[n++] = p_hal->qm->bScalingLists4x4[i][j * 4 + 3]; + } + } + + for (i = 0; i < 2; i++) { //8x8, 2 lists + RK_U32 blk4_x = 0, blk4_y = 0; + + /* dump by block4x4, vectial direction */ + for (blk4_y = 0; blk4_y < 8; blk4_y += 4) { + for (blk4_x = 0; blk4_x < 8; blk4_x += 4) { + RK_U32 pos = blk4_y * 8 + blk4_x; + + for (j = 0; j < 4; j++) { + data[n++] = p_hal->qm->bScalingLists8x8[i][pos + j * 8 + 0]; + data[n++] = p_hal->qm->bScalingLists8x8[i][pos + j * 8 + 1]; + data[n++] = p_hal->qm->bScalingLists8x8[i][pos + j * 8 + 2]; + data[n++] = p_hal->qm->bScalingLists8x8[i][pos + j * 8 + 3]; + } + } + } + } + + mpp_assert(n <= len); + +#ifdef DUMP_VDPU384A_DATAS + { + char *cur_fname = "scanlist.dat"; + memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path)); + sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname); + dump_data_to_file(dump_cur_fname_path, (void *)data, 8 * n, 128, 0); + } +#endif + + return MPP_OK; +} + +static MPP_RET set_registers(H264dHalCtx_t *p_hal, Vdpu384aH264dRegSet *regs, HalTaskInfo *task) +{ + DXVA_PicParams_H264_MVC *pp = p_hal->pp; + HalBuf *mv_buf = NULL; + HalBuf *origin_buf = NULL; + Vdpu384aH264dRegCtx *ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + + // memset(regs, 0, sizeof(Vdpu384aH264dRegSet)); + regs->h264d_paras.reg66_stream_len = p_hal->strm_len; + + //!< caculate the yuv_frame_size + { + MppFrame mframe = NULL; + RK_U32 hor_virstride = 0; + RK_U32 ver_virstride = 0; + RK_U32 y_virstride = 0; + RK_U32 uv_virstride = 0; + + mpp_buf_slot_get_prop(p_hal->frame_slots, pp->CurrPic.Index7Bits, SLOT_FRAME_PTR, &mframe); + hor_virstride = mpp_frame_get_hor_stride(mframe); + ver_virstride = mpp_frame_get_ver_stride(mframe); + y_virstride = hor_virstride * ver_virstride; + uv_virstride = hor_virstride * ver_virstride / 2; + + if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) { + RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe); + RK_U32 fbd_offset; + + fbd_offset = fbc_hdr_stride * MPP_ALIGN(ver_virstride, 64) / 16; + + regs->ctrl_regs.reg9.dpb_data_sel = 0; + regs->ctrl_regs.reg9.dpb_output_dis = 0; + regs->ctrl_regs.reg9.pp_m_output_mode = 0; + + regs->h264d_paras.reg68_dpb_hor_virstride = fbc_hdr_stride / 64; + regs->h264d_addrs.reg193_dpb_fbc64x4_payload_offset = fbd_offset; + } else if (MPP_FRAME_FMT_IS_TILE(mpp_frame_get_fmt(mframe))) { + regs->ctrl_regs.reg9.dpb_data_sel = 1; + regs->ctrl_regs.reg9.dpb_output_dis = 1; + regs->ctrl_regs.reg9.pp_m_output_mode = 2; + + regs->h264d_paras.reg77_pp_m_hor_stride = hor_virstride * 6 / 16; + regs->h264d_paras.reg79_pp_m_y_virstride = (y_virstride + uv_virstride) / 16; + } else { + regs->ctrl_regs.reg9.dpb_data_sel = 1; + regs->ctrl_regs.reg9.dpb_output_dis = 1; + regs->ctrl_regs.reg9.pp_m_output_mode = 1; + + regs->h264d_paras.reg77_pp_m_hor_stride = hor_virstride / 16; + regs->h264d_paras.reg78_pp_m_uv_hor_stride = hor_virstride / 16; + regs->h264d_paras.reg79_pp_m_y_virstride = y_virstride / 16; + } + } + //!< set current + { + MppBuffer mbuffer = NULL; + RK_S32 fd = -1; + + mpp_buf_slot_get_prop(p_hal->frame_slots, pp->CurrPic.Index7Bits, SLOT_BUFFER, &mbuffer); + fd = mpp_buffer_get_fd(mbuffer); + /* output rkfbc64 */ + // regs->h264d_addrs.reg168_dpb_decout_base = fd; + /* output raster/tile4x4 */ + regs->common_addr.reg135_pp_m_decout_base = fd; + regs->h264d_addrs.reg192_dpb_payload64x4_st_cur_base = fd; + + //colmv_cur_base + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, pp->CurrPic.Index7Bits); + regs->h264d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]); + regs->h264d_addrs.reg169_error_ref_base = fd; + } + //!< set reference + { + RK_S32 i = 0; + RK_S32 fd = -1; + RK_S32 ref_index = -1; + RK_S32 near_index = -1; + MppBuffer mbuffer = NULL; + RK_U32 min_frame_num = 0; + MppFrame mframe = NULL; + + for (i = 0; i < 15; i++) { + if (pp->RefFrameList[i].bPicEntry != 0xff) { + ref_index = pp->RefFrameList[i].Index7Bits; + near_index = pp->RefFrameList[i].Index7Bits; + } else { + ref_index = (near_index < 0) ? pp->CurrPic.Index7Bits : near_index; + } + /* mark 3 to differ from current frame */ + mpp_buf_slot_get_prop(p_hal->frame_slots, ref_index, SLOT_BUFFER, &mbuffer); + mpp_buf_slot_get_prop(p_hal->frame_slots, ref_index, SLOT_FRAME_PTR, &mframe); + if (ctx->origin_bufs && mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) { + origin_buf = hal_bufs_get_buf(ctx->origin_bufs, ref_index); + mbuffer = origin_buf->buf[0]; + } + + if (pp->FrameNumList[i] < pp->frame_num && + pp->FrameNumList[i] > min_frame_num && + (!mpp_frame_get_errinfo(mframe))) { + min_frame_num = pp->FrameNumList[i]; + regs->h264d_addrs.reg169_error_ref_base = mpp_buffer_get_fd(mbuffer); + } + + fd = mpp_buffer_get_fd(mbuffer); + regs->h264d_addrs.reg170_185_ref_base[i] = fd; + regs->h264d_addrs.reg195_210_payload_st_ref_base[i] = fd; + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, ref_index); + regs->h264d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + + if (pp->RefFrameList[15].bPicEntry != 0xff) { + ref_index = pp->RefFrameList[15].Index7Bits; + } else { + ref_index = (near_index < 0) ? pp->CurrPic.Index7Bits : near_index; + } + + mpp_buf_slot_get_prop(p_hal->frame_slots, ref_index, SLOT_BUFFER, &mbuffer); + fd = mpp_buffer_get_fd(mbuffer); + if (mpp_frame_get_thumbnail_en(mframe) == 2) { + origin_buf = hal_bufs_get_buf(ctx->origin_bufs, ref_index); + fd = mpp_buffer_get_fd(origin_buf->buf[0]); + } + regs->h264d_addrs.reg170_185_ref_base[15] = fd; + regs->h264d_addrs.reg195_210_payload_st_ref_base[15] = fd; + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, ref_index); + regs->h264d_addrs.reg217_232_colmv_ref_base[15] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + { + MppBuffer mbuffer = NULL; + + mpp_buf_slot_get_prop(p_hal->packet_slots, task->dec.input, SLOT_BUFFER, &mbuffer); + regs->common_addr.reg128_strm_base = mpp_buffer_get_fd(mbuffer); + regs->common_addr.reg129_stream_buf_st_base = mpp_buffer_get_fd(mbuffer); + regs->common_addr.reg130_stream_buf_end_base = mpp_buffer_get_fd(mbuffer); + mpp_dev_set_reg_offset(p_hal->dev, 130, mpp_buffer_get_size(mbuffer)); + // regs->h264d_paras.reg65_strm_start_bit = 2 * 8; +#ifdef DUMP_VDPU384A_DATAS + { + char *cur_fname = "stream_in.dat"; + memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path)); + sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname); + dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(mbuffer), + 8 * p_hal->strm_len, 128, 0); + } +#endif + } + + { + //scale down config + MppFrame mframe = NULL; + MppBuffer mbuffer = NULL; + RK_S32 fd = -1; + MppFrameThumbnailMode thumbnail_mode; + + mpp_buf_slot_get_prop(p_hal->frame_slots, pp->CurrPic.Index7Bits, SLOT_BUFFER, &mbuffer); + mpp_buf_slot_get_prop(p_hal->frame_slots, pp->CurrPic.Index7Bits, + SLOT_FRAME_PTR, &mframe); + fd = mpp_buffer_get_fd(mbuffer); + thumbnail_mode = mpp_frame_get_thumbnail_en(mframe); + switch (thumbnail_mode) { + case MPP_FRAME_THUMBNAIL_ONLY: + regs->common_addr.reg133_scale_down_base = fd; + origin_buf = hal_bufs_get_buf(ctx->origin_bufs, pp->CurrPic.Index7Bits); + fd = mpp_buffer_get_fd(origin_buf->buf[0]); + /* output rkfbc64 */ + // regs->h264d_addrs.reg168_dpb_decout_base = fd; + /* output raster/tile4x4 */ + regs->common_addr.reg135_pp_m_decout_base = fd; + regs->h264d_addrs.reg192_dpb_payload64x4_st_cur_base = fd; + regs->h264d_addrs.reg169_error_ref_base = fd; + vdpu384a_setup_down_scale(mframe, p_hal->dev, ®s->ctrl_regs, (void*)®s->h264d_paras); + break; + case MPP_FRAME_THUMBNAIL_MIXED: + regs->common_addr.reg133_scale_down_base = fd; + vdpu384a_setup_down_scale(mframe, p_hal->dev, ®s->ctrl_regs, (void*)®s->h264d_paras); + break; + case MPP_FRAME_THUMBNAIL_NONE: + default: + regs->ctrl_regs.reg9.scale_down_en = 0; + break; + } + } + + return MPP_OK; +} + +static MPP_RET init_ctrl_regs(Vdpu384aH264dRegSet *regs) +{ + Vdpu384aCtrlReg *ctrl_regs = ®s->ctrl_regs; + + ctrl_regs->reg8_dec_mode = 1; //!< h264 + ctrl_regs->reg9.low_latency_en = 0; + + ctrl_regs->reg10.strmd_auto_gating_e = 1; + ctrl_regs->reg10.inter_auto_gating_e = 1; + ctrl_regs->reg10.intra_auto_gating_e = 1; + ctrl_regs->reg10.transd_auto_gating_e = 1; + ctrl_regs->reg10.recon_auto_gating_e = 1; + ctrl_regs->reg10.filterd_auto_gating_e = 1; + ctrl_regs->reg10.bus_auto_gating_e = 1; + ctrl_regs->reg10.ctrl_auto_gating_e = 1; + ctrl_regs->reg10.rcb_auto_gating_e = 1; + ctrl_regs->reg10.err_prc_auto_gating_e = 1; + + ctrl_regs->reg11.rd_outstanding = 32; + ctrl_regs->reg11.wr_outstanding = 250; + + ctrl_regs->reg13_core_timeout_threshold = 0xffffff; + + ctrl_regs->reg16.error_proc_disable = 1; + ctrl_regs->reg16.error_spread_disable = 0; + ctrl_regs->reg16.roi_error_ctu_cal_en = 0; + + ctrl_regs->reg20_cabac_error_en_lowbits = 0xfffedfff; + ctrl_regs->reg21_cabac_error_en_highbits = 0x0ffbf9ff; + + /* performance */ + ctrl_regs->reg28.axi_perf_work_e = 1; + ctrl_regs->reg28.axi_cnt_type = 1; + ctrl_regs->reg28.rd_latency_id = 11; + + ctrl_regs->reg29.addr_align_type = 2; + ctrl_regs->reg29.ar_cnt_id_type = 0; + ctrl_regs->reg29.aw_cnt_id_type = 0; + ctrl_regs->reg29.ar_count_id = 0xa; + ctrl_regs->reg29.aw_count_id = 0; + ctrl_regs->reg29.rd_band_width_mode = 0; + + return MPP_OK; +} + +MPP_RET vdpu384a_h264d_init(void *hal, MppHalCfg *cfg) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + (void) cfg; + + MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(Vdpu384aH264dRegCtx))); + Vdpu384aH264dRegCtx *reg_ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + RK_U32 max_cnt = p_hal->fast_mode ? VDPU384A_FAST_REG_SET_CNT : 1; + RK_U32 i = 0; + + //!< malloc buffers + FUN_CHECK(ret = mpp_buffer_get(p_hal->buf_group, ®_ctx->bufs, + VDPU384A_INFO_BUFFER_SIZE(max_cnt))); + reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs); + reg_ctx->bufs_ptr = mpp_buffer_get_ptr(reg_ctx->bufs); + reg_ctx->offset_errinfo = VDPU384A_ERROR_INFO_OFFSET; + for (i = 0; i < max_cnt; i++) { + reg_ctx->reg_buf[i].regs = mpp_calloc(Vdpu384aH264dRegSet, 1); + init_ctrl_regs(reg_ctx->reg_buf[i].regs); + reg_ctx->offset_spspps[i] = VDPU384A_SPSPPS_OFFSET(i); + reg_ctx->offset_sclst[i] = VDPU384A_SCALING_LIST_OFFSET(i); + } + + mpp_buffer_attach_dev(reg_ctx->bufs, p_hal->dev); + + if (!p_hal->fast_mode) { + reg_ctx->regs = reg_ctx->reg_buf[0].regs; + reg_ctx->spspps_offset = reg_ctx->offset_spspps[0]; + reg_ctx->sclst_offset = reg_ctx->offset_sclst[0]; + } + + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64); + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, rkv_ver_align); + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, rkv_len_align); + + if (cfg->hal_fbc_adj_cfg) { + cfg->hal_fbc_adj_cfg->func = vdpu384a_afbc_align_calc; + cfg->hal_fbc_adj_cfg->expand = 16; + } + +__RETURN: + return MPP_OK; +__FAILED: + vdpu384a_h264d_deinit(hal); + + return ret; +} + +MPP_RET vdpu384a_h264d_deinit(void *hal) +{ + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + Vdpu384aH264dRegCtx *reg_ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + + RK_U32 i = 0; + RK_U32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1; + + if (reg_ctx->bufs) { + mpp_buffer_put(reg_ctx->bufs); + reg_ctx->bufs = NULL; + } + + for (i = 0; i < loop; i++) + MPP_FREE(reg_ctx->reg_buf[i].regs); + + loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1; + for (i = 0; i < loop; i++) { + if (reg_ctx->rcb_buf[i]) { + mpp_buffer_put(reg_ctx->rcb_buf[i]); + reg_ctx->rcb_buf[i] = NULL; + } + } + + if (p_hal->cmv_bufs) { + hal_bufs_deinit(p_hal->cmv_bufs); + p_hal->cmv_bufs = NULL; + } + + if (reg_ctx->origin_bufs) { + hal_bufs_deinit(reg_ctx->origin_bufs); + reg_ctx->origin_bufs = NULL; + } + + MPP_FREE(p_hal->reg_ctx); + + return MPP_OK; +} + +static void h264d_refine_rcb_size(H264dHalCtx_t *p_hal, Vdpu384aRcbInfo *rcb_info, + RK_S32 width, RK_S32 height) +{ + RK_U32 rcb_bits = 0; + RK_U32 mbaff = p_hal->pp->MbaffFrameFlag; + RK_U32 bit_depth = p_hal->pp->bit_depth_luma_minus8 + 8; + RK_U32 chroma_format_idc = p_hal->pp->chroma_format_idc; + RK_U32 row_uv_para = 1; // for yuv420/yuv422 + RK_U32 filterd_row_append = 8192; + + // vdpu384a h264d support yuv400/yuv420/yuv422 + if (chroma_format_idc == 0) + row_uv_para = 0; + + width = MPP_ALIGN(width, H264_CTU_SIZE); + height = MPP_ALIGN(height, H264_CTU_SIZE); + /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/ + if (width > 4096) + rcb_bits = ((width + 15) / 16) * 158 * (mbaff ? 2 : 1); + else + rcb_bits = 0; + rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits); + rcb_info[RCB_STRMD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/ + rcb_bits = ((width + 3) / 4) * 92 * (mbaff ? 2 : 1); + rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits); + rcb_info[RCB_INTER_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/ + rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2) * (mbaff ? 2 : 1); + if (chroma_format_idc == 1 || chroma_format_idc == 2) + rcb_bits = rcb_bits * 5 / 2; //TODO: + + rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits); + rcb_info[RCB_INTRA_TILE_ROW].size = 0; + /* RCB_FILTERD_ROW && RCB_FILTERD_PROTECT_ROW*/ + // save space mode : half for RCB_FILTERD_ROW, half for RCB_FILTERD_PROTECT_ROW + rcb_bits = width * 13 * ((6 + 3 * row_uv_para) * (mbaff ? 2 : 1) + 2 * row_uv_para + 1.5); + if (width > 4096) + filterd_row_append = 27648; + rcb_info[RCB_FILTERD_ROW].size = MPP_RCB_BYTES(rcb_bits / 2); + rcb_info[RCB_FILTERD_PROTECT_ROW].size = filterd_row_append + MPP_RCB_BYTES(rcb_bits / 2); + + rcb_info[RCB_FILTERD_TILE_ROW].size = 0; + /* RCB_FILTERD_TILE_COL */ + rcb_info[RCB_FILTERD_TILE_COL].size = 0; + +} + +static void hal_h264d_rcb_info_update(void *hal) +{ + H264dHalCtx_t *p_hal = (H264dHalCtx_t*)hal; + RK_U32 mbaff = p_hal->pp->MbaffFrameFlag; + RK_U32 bit_depth = p_hal->pp->bit_depth_luma_minus8 + 8; + RK_U32 chroma_format_idc = p_hal->pp->chroma_format_idc; + Vdpu384aH264dRegCtx *ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + RK_S32 width = MPP_ALIGN((p_hal->pp->wFrameWidthInMbsMinus1 + 1) << 4, 64); + RK_S32 height = MPP_ALIGN((p_hal->pp->wFrameHeightInMbsMinus1 + 1) << 4, 64); + + if ( ctx->bit_depth != bit_depth || + ctx->chroma_format_idc != chroma_format_idc || + ctx->mbaff != mbaff || + ctx->width != width || + ctx->height != height) { + RK_U32 i; + RK_U32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(ctx->reg_buf) : 1; + + ctx->rcb_buf_size = vdpu384a_get_rcb_buf_size(ctx->rcb_info, width, height); + h264d_refine_rcb_size(hal, ctx->rcb_info, width, height); + /* vdpu384a_check_rcb_buf_size(ctx->rcb_info, width, height); */ + for (i = 0; i < loop; i++) { + MppBuffer rcb_buf = ctx->rcb_buf[i]; + + if (rcb_buf) { + mpp_buffer_put(rcb_buf); + ctx->rcb_buf[i] = NULL; + } + mpp_buffer_get(p_hal->buf_group, &rcb_buf, ctx->rcb_buf_size); + ctx->rcb_buf[i] = rcb_buf; + } + ctx->bit_depth = bit_depth; + ctx->width = width; + ctx->height = height; + ctx->mbaff = mbaff; + ctx->chroma_format_idc = chroma_format_idc; + } +} + +MPP_RET vdpu384a_h264d_gen_regs(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + RK_S32 width = MPP_ALIGN((p_hal->pp->wFrameWidthInMbsMinus1 + 1) << 4, 64); + RK_S32 height = MPP_ALIGN((p_hal->pp->wFrameHeightInMbsMinus1 + 1) << 4, 64); + Vdpu384aH264dRegCtx *ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + Vdpu384aH264dRegSet *regs = ctx->regs; + MppFrame mframe; + RK_S32 mv_size = MPP_ALIGN(width, 64) * MPP_ALIGN(height, 16); // 16 byte unit + + INP_CHECK(ret, NULL == p_hal); + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + goto __RETURN; + } + + /* if is field mode is enabled enlarge colmv buffer and disable colmv compression */ + if (!p_hal->pp->frame_mbs_only_flag) + mv_size *= 2; + + if (p_hal->cmv_bufs == NULL || p_hal->mv_size < mv_size) { + size_t size = mv_size; + + if (p_hal->cmv_bufs) { + hal_bufs_deinit(p_hal->cmv_bufs); + p_hal->cmv_bufs = NULL; + } + + hal_bufs_init(&p_hal->cmv_bufs); + if (p_hal->cmv_bufs == NULL) { + mpp_err_f("colmv bufs init fail"); + goto __RETURN; + } + p_hal->mv_size = mv_size; + p_hal->mv_count = mpp_buf_slot_get_count(p_hal->frame_slots); + hal_bufs_setup(p_hal->cmv_bufs, p_hal->mv_count, 1, &size); + } + + mpp_buf_slot_get_prop(p_hal->frame_slots, p_hal->pp->CurrPic.Index7Bits, SLOT_FRAME_PTR, &mframe); + if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY && + ctx->origin_bufs == NULL) { + vdpu384a_setup_scale_origin_bufs(p_hal, mframe); + } + + if (p_hal->fast_mode) { + RK_U32 i = 0; + for (i = 0; i < MPP_ARRAY_ELEMS(ctx->reg_buf); i++) { + if (!ctx->reg_buf[i].valid) { + task->dec.reg_index = i; + regs = ctx->reg_buf[i].regs; + + ctx->spspps_offset = ctx->offset_spspps[i]; + ctx->sclst_offset = ctx->offset_sclst[i]; + ctx->reg_buf[i].valid = 1; + break; + } + } + } + +#ifdef DUMP_VDPU384A_DATAS + { + memset(dump_cur_dir, 0, sizeof(dump_cur_dir)); + sprintf(dump_cur_dir, "avc/Frame%04d", dump_cur_frame); + if (access(dump_cur_dir, 0)) { + if (mkdir(dump_cur_dir)) + mpp_err_f("error: mkdir %s\n", dump_cur_dir); + } + dump_cur_frame++; + } +#endif + + prepare_spspps(p_hal, (RK_U64 *)&ctx->spspps, sizeof(ctx->spspps) / 8); + prepare_scanlist(p_hal, ctx->sclst, sizeof(ctx->sclst)); + set_registers(p_hal, regs, task); + + //!< copy spspps datas + memcpy((char *)ctx->bufs_ptr + ctx->spspps_offset, (char *)ctx->spspps, sizeof(ctx->spspps)); + + regs->common_addr.reg131_gbl_base = ctx->bufs_fd; + regs->h264d_paras.reg67_global_len = VDPU384A_SPSPPS_SIZE / 16; // 128 bit as unit + mpp_dev_set_reg_offset(p_hal->dev, 131, ctx->spspps_offset); + + if (p_hal->pp->scaleing_list_enable_flag) { + memcpy((char *)ctx->bufs_ptr + ctx->sclst_offset, (void *)ctx->sclst, sizeof(ctx->sclst)); + regs->common_addr.reg132_scanlist_addr = ctx->bufs_fd; + mpp_dev_set_reg_offset(p_hal->dev, 132, ctx->sclst_offset); + } else { + regs->common_addr.reg132_scanlist_addr = 0; + } + + hal_h264d_rcb_info_update(p_hal); + vdpu384a_setup_rcb(®s->common_addr, p_hal->dev, p_hal->fast_mode ? + ctx->rcb_buf[task->dec.reg_index] : ctx->rcb_buf[0], + ctx->rcb_info); + vdpu384a_setup_statistic(®s->ctrl_regs); + mpp_buffer_sync_end(ctx->bufs); + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu384a_h264d_start(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + INP_CHECK(ret, NULL == p_hal); + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + goto __RETURN; + } + + Vdpu384aH264dRegCtx *reg_ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + Vdpu384aH264dRegSet *regs = p_hal->fast_mode ? + reg_ctx->reg_buf[task->dec.reg_index].regs : + reg_ctx->regs; + MppDev dev = p_hal->dev; + + do { + MppDevRegWrCfg wr_cfg; + MppDevRegRdCfg rd_cfg; + + wr_cfg.reg = ®s->ctrl_regs; + wr_cfg.size = sizeof(regs->ctrl_regs); + wr_cfg.offset = OFFSET_CTRL_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->common_addr; + wr_cfg.size = sizeof(regs->common_addr); + wr_cfg.offset = OFFSET_COMMON_ADDR_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->h264d_paras; + wr_cfg.size = sizeof(regs->h264d_paras); + wr_cfg.offset = OFFSET_CODEC_PARAS_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->h264d_addrs; + wr_cfg.size = sizeof(regs->h264d_addrs); + wr_cfg.offset = OFFSET_CODEC_ADDR_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + rd_cfg.reg = ®s->ctrl_regs.reg15; + rd_cfg.size = sizeof(regs->ctrl_regs.reg15); + rd_cfg.offset = OFFSET_INTERRUPT_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + + /* rcb info for sram */ + vdpu384a_set_rcbinfo(dev, (Vdpu384aRcbInfo*)reg_ctx->rcb_info); + + /* send request to hardware */ + ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL); + if (ret) { + mpp_err_f("send cmd failed %d\n", ret); + break; + } + } while (0); + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu384a_h264d_wait(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + Vdpu384aH264dRegCtx *reg_ctx = (Vdpu384aH264dRegCtx *)p_hal->reg_ctx; + Vdpu384aH264dRegSet *p_regs = p_hal->fast_mode ? + reg_ctx->reg_buf[task->dec.reg_index].regs : + reg_ctx->regs; + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + goto __SKIP_HARD; + } + + ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL); + if (ret) + mpp_err_f("poll cmd failed %d\n", ret); + +__SKIP_HARD: + if (p_hal->dec_cb) { + DecCbHalDone param; + + param.task = (void *)&task->dec; + param.regs = (RK_U32 *)p_regs; + + if ((!p_regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) || + p_regs->ctrl_regs.reg15.rkvdec_strm_error_sta || + p_regs->ctrl_regs.reg15.rkvdec_core_timeout_sta || + p_regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta || + p_regs->ctrl_regs.reg15.rkvdec_bus_error_sta || + p_regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta || + p_regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta) + param.hard_err = 1; + else + param.hard_err = 0; + + mpp_callback(p_hal->dec_cb, ¶m); + } + memset(&p_regs->ctrl_regs.reg19, 0, sizeof(RK_U32)); + if (p_hal->fast_mode) { + reg_ctx->reg_buf[task->dec.reg_index].valid = 0; + } + + (void)task; +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu384a_h264d_reset(void *hal) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu384a_h264d_flush(void *hal) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu384a_h264d_control(void *hal, MpiCmd cmd_type, void *param) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + + switch ((MpiCmd)cmd_type) { + case MPP_DEC_SET_FRAME_INFO: { + MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param); + RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param); + RK_U32 imgheight = mpp_frame_get_height((MppFrame)param); + + mpp_log("control info: fmt %d, w %d, h %d\n", fmt, imgwidth, imgheight); + if (fmt == MPP_FMT_YUV422SP) { + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, rkv_len_align_422); + } + if (MPP_FRAME_FMT_IS_FBC(fmt)) { + vdpu384a_afbc_align_calc(p_hal->frame_slots, (MppFrame)param, 16); + } else if (imgwidth > 1920 || imgheight > 1088) { + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64); + } + } break; + case MPP_DEC_GET_THUMBNAIL_FRAME_INFO: { + vdpu384a_update_thumbnail_frame_info((MppFrame)param); + } break; + case MPP_DEC_SET_OUTPUT_FORMAT: { + } break; + default : { + } break; + } + +__RETURN: + return ret = MPP_OK; +} + +const MppHalApi hal_h264d_vdpu384a = { + .name = "h264d_vdpu384a", + .type = MPP_CTX_DEC, + .coding = MPP_VIDEO_CodingAVC, + .ctx_size = sizeof(Vdpu384aH264dRegCtx), + .flag = 0, + .init = vdpu384a_h264d_init, + .deinit = vdpu384a_h264d_deinit, + .reg_gen = vdpu384a_h264d_gen_regs, + .start = vdpu384a_h264d_start, + .wait = vdpu384a_h264d_wait, + .reset = vdpu384a_h264d_reset, + .flush = vdpu384a_h264d_flush, + .control = vdpu384a_h264d_control, +}; diff --git a/mpp/hal/rkdec/h264d/hal_h264d_vdpu384a.h b/mpp/hal/rkdec/h264d/hal_h264d_vdpu384a.h new file mode 100644 index 00000000..03d85722 --- /dev/null +++ b/mpp/hal/rkdec/h264d/hal_h264d_vdpu384a.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#ifndef __HAL_H264D_VDPU384A_H__ +#define __HAL_H264D_VDPU384A_H__ + +#include "mpp_hal.h" +#include "vdpu384a.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const MppHalApi hal_h264d_vdpu384a; + +#ifdef __cplusplus +} +#endif + +#endif /* __HAL_H264D_VDPU384A_H__ */ diff --git a/mpp/hal/rkdec/h265d/CMakeLists.txt b/mpp/hal/rkdec/h265d/CMakeLists.txt index dd5579f8..44bf1108 100644 --- a/mpp/hal/rkdec/h265d/CMakeLists.txt +++ b/mpp/hal/rkdec/h265d/CMakeLists.txt @@ -8,9 +8,10 @@ set(HAL_H265D_SRC hal_h265d_vdpu34x.c hal_h265d_vdpu382.c hal_h265d_vdpu383.c + hal_h265d_vdpu384a.c ) add_library(${HAL_H265D} STATIC ${HAL_H265D_SRC}) set_target_properties(${HAL_H265D} PROPERTIES FOLDER "mpp/hal") -target_link_libraries(${HAL_H265D} vdpu34x_com vdpu383_com mpp_base) +target_link_libraries(${HAL_H265D} vdpu34x_com vdpu383_com vdpu384a_com mpp_base) diff --git a/mpp/hal/rkdec/h265d/hal_h265d_api.c b/mpp/hal/rkdec/h265d/hal_h265d_api.c index 3a1ecf61..c9b6aab5 100644 --- a/mpp/hal/rkdec/h265d/hal_h265d_api.c +++ b/mpp/hal/rkdec/h265d/hal_h265d_api.c @@ -29,6 +29,7 @@ #include "hal_h265d_vdpu34x.h" #include "hal_h265d_vdpu382.h" #include "hal_h265d_vdpu383.h" +#include "hal_h265d_vdpu384a.h" RK_U32 hal_h265d_debug = 0; @@ -63,6 +64,7 @@ MPP_RET hal_h265d_init(void *ctx, MppHalCfg *cfg) p->is_v345 = (hw_id == HWID_VDPU345); p->is_v34x = (hw_id == HWID_VDPU34X || hw_id == HWID_VDPU38X); p->is_v383 = (hw_id == HWID_VDPU383); + p->is_v384a = (hw_id == HWID_VDPU384A); p->client_type = client_type; if (hw_id == HWID_VDPU382_RK3528 || hw_id == HWID_VDPU382_RK3562) @@ -71,6 +73,8 @@ MPP_RET hal_h265d_init(void *ctx, MppHalCfg *cfg) p->api = &hal_h265d_vdpu34x; else if (p->is_v383) p->api = &hal_h265d_vdpu383; + else if (p->is_v384a) + p->api = &hal_h265d_vdpu384a; else p->api = &hal_h265d_rkv; diff --git a/mpp/hal/rkdec/h265d/hal_h265d_ctx.h b/mpp/hal/rkdec/h265d/hal_h265d_ctx.h index 346dbbc7..0725bfea 100644 --- a/mpp/hal/rkdec/h265d/hal_h265d_ctx.h +++ b/mpp/hal/rkdec/h265d/hal_h265d_ctx.h @@ -64,10 +64,13 @@ typedef struct HalH265dCtx_t { RK_U32 mv_size; RK_S32 mv_count; - RK_U32 is_v341; - RK_U32 is_v345; - RK_U32 is_v34x; - RK_U32 is_v383; + struct { + RK_U32 is_v341 : 1; + RK_U32 is_v345 : 1; + RK_U32 is_v34x : 1; + RK_U32 is_v383 : 1; + RK_U32 is_v384a : 1; + }; /* rcb info */ RK_U32 chroma_fmt_idc; RK_U8 ctu_size; diff --git a/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.c b/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.c new file mode 100644 index 00000000..fbe77bfc --- /dev/null +++ b/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.c @@ -0,0 +1,1426 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#define MODULE_TAG "hal_h265d_vdpu384a" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpp_env.h" +#include "mpp_mem.h" +#include "mpp_bitread.h" +#include "mpp_bitput.h" +#include "mpp_buffer_impl.h" + +#include "h265d_syntax.h" +#include "hal_h265d_debug.h" +#include "hal_h265d_ctx.h" +#include "hal_h265d_com.h" +#include "hal_h265d_vdpu384a.h" +#include "vdpu384a_h265d.h" +#include "vdpu384a_com.h" + +#define PPS_SIZE (112 * 64)//(96x64) + +#define FMT 4 +#define CTU 3 + +typedef struct { + RK_U32 a; + RK_U32 b; +} FilterdColBufRatio; + +#define SPSPPS_ALIGNED_SIZE (MPP_ALIGN(2181 + 64, 128) / 8) // byte, 2181 bit + Reserve 64 +#define SCALIST_ALIGNED_SIZE (MPP_ALIGN(81 * 1360, SZ_4K)) +#define INFO_BUFFER_SIZE (SPSPPS_ALIGNED_SIZE + SCALIST_ALIGNED_SIZE) +#define ALL_BUFFER_SIZE(cnt) (INFO_BUFFER_SIZE *cnt) + +#define SPSPPS_OFFSET(pos) (INFO_BUFFER_SIZE * pos) +#define SCALIST_OFFSET(pos) (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE) + +#define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a))) + +static RK_U32 rkv_len_align_422(RK_U32 val) +{ + return (2 * MPP_ALIGN(val, 16)); +} + +static RK_U32 rkv_len_align_444(RK_U32 val) +{ + return (3 * MPP_ALIGN(val, 16)); +} + +static MPP_RET vdpu384a_setup_scale_origin_bufs(HalH265dCtx *ctx, MppFrame mframe) +{ + /* for 8K FrameBuf scale mode */ + size_t origin_buf_size = 0; + + origin_buf_size = mpp_frame_get_buf_size(mframe); + + if (!origin_buf_size) { + mpp_err_f("origin_bufs get buf size failed\n"); + return MPP_NOK; + } + + if (ctx->origin_bufs) { + hal_bufs_deinit(ctx->origin_bufs); + ctx->origin_bufs = NULL; + } + hal_bufs_init(&ctx->origin_bufs); + if (!ctx->origin_bufs) { + mpp_err_f("origin_bufs init fail\n"); + return MPP_ERR_NOMEM; + } + + hal_bufs_setup(ctx->origin_bufs, 16, 1, &origin_buf_size); + + return MPP_OK; +} + +static MPP_RET hal_h265d_vdpu384a_init(void *hal, MppHalCfg *cfg) +{ + RK_S32 ret = 0; + HalH265dCtx *reg_ctx = (HalH265dCtx *)hal; + + mpp_slots_set_prop(reg_ctx->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64); + mpp_slots_set_prop(reg_ctx->slots, SLOTS_VER_ALIGN, hevc_ver_align); + + reg_ctx->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1); + if (reg_ctx->scaling_qm == NULL) { + mpp_err("scaling_org alloc fail"); + return MPP_ERR_MALLOC; + } + + reg_ctx->scaling_rk = mpp_calloc(scalingFactor_t, 1); + reg_ctx->pps_buf = mpp_calloc(RK_U8, SPSPPS_ALIGNED_SIZE); + + if (reg_ctx->scaling_rk == NULL) { + mpp_err("scaling_rk alloc fail"); + return MPP_ERR_MALLOC; + } + + if (reg_ctx->group == NULL) { + ret = mpp_buffer_group_get_internal(®_ctx->group, MPP_BUFFER_TYPE_ION); + if (ret) { + mpp_err("h265d mpp_buffer_group_get failed\n"); + return ret; + } + } + + { + RK_U32 i = 0; + RK_U32 max_cnt = reg_ctx->fast_mode ? MAX_GEN_REG : 1; + + //!< malloc buffers + ret = mpp_buffer_get(reg_ctx->group, ®_ctx->bufs, ALL_BUFFER_SIZE(max_cnt)); + if (ret) { + mpp_err("h265d mpp_buffer_get failed\n"); + return ret; + } + + reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs); + for (i = 0; i < max_cnt; i++) { + reg_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu384aH265dRegSet)); + reg_ctx->offset_spspps[i] = SPSPPS_OFFSET(i); + reg_ctx->offset_sclst[i] = SCALIST_OFFSET(i); + } + + mpp_buffer_attach_dev(reg_ctx->bufs, reg_ctx->dev); + } + + if (!reg_ctx->fast_mode) { + reg_ctx->hw_regs = reg_ctx->g_buf[0].hw_regs; + reg_ctx->spspps_offset = reg_ctx->offset_spspps[0]; + reg_ctx->sclst_offset = reg_ctx->offset_sclst[0]; + } + + if (cfg->hal_fbc_adj_cfg) { + cfg->hal_fbc_adj_cfg->func = vdpu384a_afbc_align_calc; + cfg->hal_fbc_adj_cfg->expand = 16; + } + + (void) cfg; + return MPP_OK; +} + +static MPP_RET hal_h265d_vdpu384a_deinit(void *hal) +{ + HalH265dCtx *reg_ctx = (HalH265dCtx *)hal; + RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1; + RK_U32 i; + + if (reg_ctx->bufs) { + mpp_buffer_put(reg_ctx->bufs); + reg_ctx->bufs = NULL; + } + + loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1; + for (i = 0; i < loop; i++) { + if (reg_ctx->rcb_buf[i]) { + mpp_buffer_put(reg_ctx->rcb_buf[i]); + reg_ctx->rcb_buf[i] = NULL; + } + } + + if (reg_ctx->group) { + mpp_buffer_group_put(reg_ctx->group); + reg_ctx->group = NULL; + } + + for (i = 0; i < loop; i++) + MPP_FREE(reg_ctx->g_buf[i].hw_regs); + + MPP_FREE(reg_ctx->scaling_qm); + MPP_FREE(reg_ctx->scaling_rk); + MPP_FREE(reg_ctx->pps_buf); + + if (reg_ctx->cmv_bufs) { + hal_bufs_deinit(reg_ctx->cmv_bufs); + reg_ctx->cmv_bufs = NULL; + } + + if (reg_ctx->origin_bufs) { + hal_bufs_deinit(reg_ctx->origin_bufs); + reg_ctx->origin_bufs = NULL; + } + + return MPP_OK; +} + +#define SCALING_LIST_NUM 6 + +void hal_vdpu384a_record_scaling_list(scalingFactor_t *pScalingFactor_out, scalingList_t *pScalingList) +{ + RK_S32 i; + RK_U32 listId; + BitputCtx_t bp; + + mpp_set_bitput_ctx(&bp, (RK_U64 *)pScalingFactor_out, 170); // 170*64bits + + //-------- following make it by hardware needed -------- + //sizeId == 0, block4x4 + for (listId = 0; listId < SCALING_LIST_NUM; listId++) { + RK_U8 *p_data = pScalingList->sl[0][listId]; + /* dump by block4x4, vectial direction */ + for (i = 0; i < 4; i++) { + mpp_put_bits(&bp, p_data[i + 0], 8); + mpp_put_bits(&bp, p_data[i + 4], 8); + mpp_put_bits(&bp, p_data[i + 8], 8); + mpp_put_bits(&bp, p_data[i + 12], 8); + } + } + //sizeId == 1, block8x8 + for (listId = 0; listId < SCALING_LIST_NUM; listId++) { + RK_S32 blk4_x = 0, blk4_y = 0; + RK_U8 *p_data = pScalingList->sl[1][listId]; + + /* dump by block4x4, vectial direction */ + for (blk4_x = 0; blk4_x < 8; blk4_x += 4) { + for (blk4_y = 0; blk4_y < 8; blk4_y += 4) { + RK_S32 pos = blk4_y * 8 + blk4_x; + + for (i = 0; i < 4; i++) { + mpp_put_bits(&bp, p_data[pos + i + 0], 8); + mpp_put_bits(&bp, p_data[pos + i + 8], 8); + mpp_put_bits(&bp, p_data[pos + i + 16], 8); + mpp_put_bits(&bp, p_data[pos + i + 24], 8); + } + } + } + } + //sizeId == 2, block16x16 + for (listId = 0; listId < SCALING_LIST_NUM; listId++) { + RK_S32 blk4_x = 0, blk4_y = 0; + RK_U8 *p_data = pScalingList->sl[2][listId]; + + /* dump by block4x4, vectial direction */ + for (blk4_x = 0; blk4_x < 8; blk4_x += 4) { + for (blk4_y = 0; blk4_y < 8; blk4_y += 4) { + RK_S32 pos = blk4_y * 8 + blk4_x; + + for (i = 0; i < 4; i++) { + mpp_put_bits(&bp, p_data[pos + i + 0], 8); + mpp_put_bits(&bp, p_data[pos + i + 8], 8); + mpp_put_bits(&bp, p_data[pos + i + 16], 8); + mpp_put_bits(&bp, p_data[pos + i + 24], 8); + } + } + } + } + //sizeId == 3, blcok32x32 + for (listId = 0; listId < 6; listId++) { + RK_S32 blk4_x = 0, blk4_y = 0; + RK_U8 *p_data = pScalingList->sl[3][listId]; + + /* dump by block4x4, vectial direction */ + for (blk4_x = 0; blk4_x < 8; blk4_x += 4) { + for (blk4_y = 0; blk4_y < 8; blk4_y += 4) { + RK_S32 pos = blk4_y * 8 + blk4_x; + + for (i = 0; i < 4; i++) { + mpp_put_bits(&bp, p_data[pos + i + 0], 8); + mpp_put_bits(&bp, p_data[pos + i + 8], 8); + mpp_put_bits(&bp, p_data[pos + i + 16], 8); + mpp_put_bits(&bp, p_data[pos + i + 24], 8); + } + } + } + } + //sizeId == 0, block4x4, horiztion direction */ + for (listId = 0; listId < SCALING_LIST_NUM; listId++) { + RK_U8 *p_data = pScalingList->sl[0][listId]; + + for (i = 0; i < 16; i++) + mpp_put_bits(&bp, p_data[i], 8); + } + + // dump dc value + for (i = 0; i < SCALING_LIST_NUM; i++)//sizeId = 2, 16x16 + mpp_put_bits(&bp, pScalingList->sl_dc[0][i], 8); + for (i = 0; i < SCALING_LIST_NUM; i++) //sizeId = 3, 32x32 + mpp_put_bits(&bp, pScalingList->sl_dc[1][i], 8); + + mpp_put_align(&bp, 128, 0); +} + +static MPP_RET hal_h265d_vdpu384a_scalinglist_packet(void *hal, void *ptr, void *dxva) +{ + scalingList_t sl; + RK_U32 i, j, pos; + h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva; + HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal; + + if (!dxva_ctx->pp.scaling_list_enabled_flag) { + return MPP_OK; + } + + if (memcmp((void*)&dxva_ctx->qm, reg_ctx->scaling_qm, sizeof(DXVA_Qmatrix_HEVC))) { + memset(&sl, 0, sizeof(scalingList_t)); + + for (i = 0; i < 6; i++) { + for (j = 0; j < 16; j++) { + pos = 4 * hal_hevc_diag_scan4x4_y[j] + hal_hevc_diag_scan4x4_x[j]; + sl.sl[0][i][pos] = dxva_ctx->qm.ucScalingLists0[i][j]; + } + + for (j = 0; j < 64; j++) { + pos = 8 * hal_hevc_diag_scan8x8_y[j] + hal_hevc_diag_scan8x8_x[j]; + sl.sl[1][i][pos] = dxva_ctx->qm.ucScalingLists1[i][j]; + sl.sl[2][i][pos] = dxva_ctx->qm.ucScalingLists2[i][j]; + + if (i == 0) + sl.sl[3][i][pos] = dxva_ctx->qm.ucScalingLists3[0][j]; + else if (i == 3) + sl.sl[3][i][pos] = dxva_ctx->qm.ucScalingLists3[1][j]; + else + sl.sl[3][i][pos] = dxva_ctx->qm.ucScalingLists2[i][j]; + } + + sl.sl_dc[0][i] = dxva_ctx->qm.ucScalingListDCCoefSizeID2[i]; + if (i == 0) + sl.sl_dc[1][i] = dxva_ctx->qm.ucScalingListDCCoefSizeID3[0]; + else if (i == 3) + sl.sl_dc[1][i] = dxva_ctx->qm.ucScalingListDCCoefSizeID3[1]; + else + sl.sl_dc[1][i] = dxva_ctx->qm.ucScalingListDCCoefSizeID2[i]; + } + hal_vdpu384a_record_scaling_list((scalingFactor_t *)reg_ctx->scaling_rk, &sl); + } + + memcpy(ptr, reg_ctx->scaling_rk, sizeof(scalingFactor_t)); + + return MPP_OK; +} + +static RK_S32 hal_h265d_v345_output_pps_packet(void *hal, void *dxva) +{ + RK_S32 i; + RK_U32 log2_min_cb_size; + RK_S32 width, height; + HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal; + Vdpu384aH265dRegSet *hw_reg = (Vdpu384aH265dRegSet*)(reg_ctx->hw_regs); + h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva; + BitputCtx_t bp; + + if (NULL == reg_ctx || dxva_ctx == NULL) { + mpp_err("%s:%s:%d reg_ctx or dxva_ctx is NULL", + __FILE__, __FUNCTION__, __LINE__); + return MPP_ERR_NULL_PTR; + } + + // SPS + { + void *pps_ptr = mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->spspps_offset; + RK_U64 *pps_packet = reg_ctx->pps_buf; + + if (NULL == pps_ptr) { + mpp_err("pps_data get ptr error"); + return MPP_ERR_NOMEM; + } + + log2_min_cb_size = dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3; + width = (dxva_ctx->pp.PicWidthInMinCbsY << log2_min_cb_size); + height = (dxva_ctx->pp.PicHeightInMinCbsY << log2_min_cb_size); + + mpp_set_bitput_ctx(&bp, pps_packet, 22); // 22*64bits + + if (dxva_ctx->pp.ps_update_flag) { + mpp_put_bits(&bp, dxva_ctx->pp.vps_id, 4); + mpp_put_bits(&bp, dxva_ctx->pp.sps_id, 4); + mpp_put_bits(&bp, dxva_ctx->pp.chroma_format_idc, 2); + + mpp_put_bits(&bp, width, 16); + mpp_put_bits(&bp, height, 16); + mpp_put_bits(&bp, dxva_ctx->pp.bit_depth_luma_minus8, 3); + mpp_put_bits(&bp, dxva_ctx->pp.bit_depth_chroma_minus8, 3); + mpp_put_bits(&bp, dxva_ctx->pp.log2_max_pic_order_cnt_lsb_minus4 + 4, 5); + mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size, 2); + mpp_put_bits(&bp, dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3, 3); + mpp_put_bits(&bp, dxva_ctx->pp.log2_min_transform_block_size_minus2 + 2, 3); + + mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_transform_block_size, 2); + mpp_put_bits(&bp, dxva_ctx->pp.max_transform_hierarchy_depth_inter, 3); + mpp_put_bits(&bp, dxva_ctx->pp.max_transform_hierarchy_depth_intra, 3); + mpp_put_bits(&bp, dxva_ctx->pp.scaling_list_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.amp_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.sample_adaptive_offset_enabled_flag, 1); + ///<-zrh comment ^ 68 bit above + mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0, 4); + mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0, 4); + mpp_put_bits(&bp, dxva_ctx->pp.pcm_loop_filter_disabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.log2_diff_max_min_pcm_luma_coding_block_size, 3); + mpp_put_bits(&bp, dxva_ctx->pp.pcm_enabled_flag ? (dxva_ctx->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3); + + mpp_put_bits(&bp, dxva_ctx->pp.num_short_term_ref_pic_sets, 7); + mpp_put_bits(&bp, dxva_ctx->pp.long_term_ref_pics_present_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.num_long_term_ref_pics_sps, 6); + mpp_put_bits(&bp, dxva_ctx->pp.sps_temporal_mvp_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.strong_intra_smoothing_enabled_flag, 1); + // SPS extenstion + mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_rotation_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_context_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.strong_intra_smoothing_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.implicit_rdpcm_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.explicit_rdpcm_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.extended_precision_processing_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.intra_smoothing_disabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.sps_max_dec_pic_buffering_minus1, 4); + mpp_put_bits(&bp, dxva_ctx->pp.separate_colour_plane_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.high_precision_offsets_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.persistent_rice_adaptation_enabled_flag, 1); + + /* PPS */ + mpp_put_bits(&bp, dxva_ctx->pp.pps_id, 6); + mpp_put_bits(&bp, dxva_ctx->pp.sps_id, 4); + mpp_put_bits(&bp, dxva_ctx->pp.dependent_slice_segments_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.output_flag_present_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.num_extra_slice_header_bits, 13); + + mpp_put_bits(&bp, dxva_ctx->pp.sign_data_hiding_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.cabac_init_present_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.num_ref_idx_l0_default_active_minus1 + 1, 4); + mpp_put_bits(&bp, dxva_ctx->pp.num_ref_idx_l1_default_active_minus1 + 1, 4); + mpp_put_bits(&bp, dxva_ctx->pp.init_qp_minus26, 7); + mpp_put_bits(&bp, dxva_ctx->pp.constrained_intra_pred_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.transform_skip_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.cu_qp_delta_enabled_flag, 1); + mpp_put_bits(&bp, log2_min_cb_size + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size - dxva_ctx->pp.diff_cu_qp_delta_depth, 3); + + mpp_put_bits(&bp, dxva_ctx->pp.pps_cb_qp_offset, 5); + mpp_put_bits(&bp, dxva_ctx->pp.pps_cr_qp_offset, 5); + mpp_put_bits(&bp, dxva_ctx->pp.pps_slice_chroma_qp_offsets_present_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.weighted_pred_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.weighted_bipred_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.transquant_bypass_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.entropy_coding_sync_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.pps_loop_filter_across_slices_enabled_flag, 1); + + mpp_put_bits(&bp, dxva_ctx->pp.loop_filter_across_tiles_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.deblocking_filter_override_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.pps_deblocking_filter_disabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.pps_beta_offset_div2, 4); + mpp_put_bits(&bp, dxva_ctx->pp.pps_tc_offset_div2, 4); + mpp_put_bits(&bp, dxva_ctx->pp.lists_modification_present_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.log2_parallel_merge_level_minus2 + 2, 3); + mpp_put_bits(&bp, dxva_ctx->pp.slice_segment_header_extension_present_flag, 1); + mpp_put_bits(&bp, 0, 3); + + // PPS externsion + if (dxva_ctx->pp.log2_max_transform_skip_block_size > 2) { + mpp_put_bits(&bp, dxva_ctx->pp.log2_max_transform_skip_block_size - 2, 2); + } else { + mpp_put_bits(&bp, 0, 2); + } + mpp_put_bits(&bp, dxva_ctx->pp.cross_component_prediction_enabled_flag, 1); + mpp_put_bits(&bp, dxva_ctx->pp.chroma_qp_offset_list_enabled_flag, 1); + + RK_S32 log2_min_cu_chroma_qp_delta_size = log2_min_cb_size + + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size - + dxva_ctx->pp.diff_cu_chroma_qp_offset_depth; + mpp_put_bits(&bp, log2_min_cu_chroma_qp_delta_size, 3); + for (i = 0; i < 6; i++) + mpp_put_bits(&bp, dxva_ctx->pp.cb_qp_offset_list[i], 5); + for (i = 0; i < 6; i++) + mpp_put_bits(&bp, dxva_ctx->pp.cr_qp_offset_list[i], 5); + mpp_put_bits(&bp, dxva_ctx->pp.chroma_qp_offset_list_len_minus1, 3); + + /* mvc0 && mvc1 */ + mpp_put_bits(&bp, 0xffff, 16); + mpp_put_bits(&bp, 0, 1); + mpp_put_bits(&bp, 0, 6); + mpp_put_bits(&bp, 0, 1); + mpp_put_bits(&bp, 0, 1); + } else { + bp.index = 4; + bp.bitpos = 41; + bp.bvalue = bp.pbuf[bp.index] & MPP_GENMASK(bp.bitpos - 1, 0); + } + /* poc info */ + { + RK_S32 dpb_valid[15] = {0}, refpic_poc[15] = {0}; + + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) { + if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff && + dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) { + dpb_valid[i] = 1; + refpic_poc[i] = dxva_ctx->pp.PicOrderCntValList[i]; + } + } + + mpp_put_bits(&bp, 0, 1); + mpp_put_bits(&bp, 0, 1); + mpp_put_bits(&bp, 0, 1); + mpp_put_bits(&bp, dxva_ctx->pp.current_poc, 32); + + for (i = 0; i < 15; i++) + mpp_put_bits(&bp, refpic_poc[i], 32); + mpp_put_bits(&bp, 0, 32); + for (i = 0; i < 15; i++) + mpp_put_bits(&bp, dpb_valid[i], 1); + mpp_put_bits(&bp, 0, 1); + } + + /* tile info */ + mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag ? (dxva_ctx->pp.num_tile_columns_minus1 + 1) : 1, 5); + mpp_put_bits(&bp, dxva_ctx->pp.tiles_enabled_flag ? (dxva_ctx->pp.num_tile_rows_minus1 + 1) : 1, 5); + { + /// tiles info begin + RK_U16 column_width[20]; + RK_U16 row_height[22]; + + memset(column_width, 0, sizeof(column_width)); + memset(row_height, 0, sizeof(row_height)); + + if (dxva_ctx->pp.tiles_enabled_flag) { + if (dxva_ctx->pp.uniform_spacing_flag == 0) { + RK_S32 maxcuwidth = dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size; + RK_S32 ctu_width_in_pic = (width + + (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ; + RK_S32 ctu_height_in_pic = (height + + (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ; + RK_S32 sum = 0; + for (i = 0; i < dxva_ctx->pp.num_tile_columns_minus1; i++) { + column_width[i] = dxva_ctx->pp.column_width_minus1[i] + 1; + sum += column_width[i] ; + } + column_width[i] = ctu_width_in_pic - sum; + + sum = 0; + for (i = 0; i < dxva_ctx->pp.num_tile_rows_minus1; i++) { + row_height[i] = dxva_ctx->pp.row_height_minus1[i] + 1; + sum += row_height[i]; + } + row_height[i] = ctu_height_in_pic - sum; + } else { + RK_S32 pic_in_cts_width = (width + + (1 << (log2_min_cb_size + + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)) - 1) + / (1 << (log2_min_cb_size + + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)); + RK_S32 pic_in_cts_height = (height + + (1 << (log2_min_cb_size + + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)) - 1) + / (1 << (log2_min_cb_size + + dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size)); + + for (i = 0; i < dxva_ctx->pp.num_tile_columns_minus1 + 1; i++) + column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_ctx->pp.num_tile_columns_minus1 + 1) - + (i * pic_in_cts_width) / (dxva_ctx->pp.num_tile_columns_minus1 + 1); + + for (i = 0; i < dxva_ctx->pp.num_tile_rows_minus1 + 1; i++) + row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_ctx->pp.num_tile_rows_minus1 + 1) - + (i * pic_in_cts_height) / (dxva_ctx->pp.num_tile_rows_minus1 + 1); + } + } else { + RK_S32 MaxCUWidth = (1 << (dxva_ctx->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size)); + column_width[0] = (width + MaxCUWidth - 1) / MaxCUWidth; + row_height[0] = (height + MaxCUWidth - 1) / MaxCUWidth; + } + + for (i = 0; i < 20; i++) + mpp_put_bits(&bp, column_width[i], 12); + + for (i = 0; i < 22; i++) + mpp_put_bits(&bp, row_height[i], 12); + } + { + Short_SPS_RPS_HEVC *cur_st_rps_ptr = &dxva_ctx->pp.cur_st_rps; + + for (i = 0; i < 32; i ++) { + mpp_put_bits(&bp, dxva_ctx->pp.sps_lt_rps[i].lt_ref_pic_poc_lsb, 16); + mpp_put_bits(&bp, dxva_ctx->pp.sps_lt_rps[i].used_by_curr_pic_lt_flag, 1); + } + + mpp_put_bits(&bp, cur_st_rps_ptr->num_negative_pics, 4); + mpp_put_bits(&bp, cur_st_rps_ptr->num_positive_pics, 4); + + for (i = 0; i < cur_st_rps_ptr->num_negative_pics; i++) { + mpp_put_bits(&bp, cur_st_rps_ptr->delta_poc_s0[i], 16); + mpp_put_bits(&bp, cur_st_rps_ptr->s0_used_flag[i], 1); + } + + for (i = 0; i < cur_st_rps_ptr->num_positive_pics; i++) { + mpp_put_bits(&bp, cur_st_rps_ptr->delta_poc_s1[i], 16); + mpp_put_bits(&bp, cur_st_rps_ptr->s1_used_flag[i], 1); + } + + for ( i = cur_st_rps_ptr->num_negative_pics + cur_st_rps_ptr->num_positive_pics; i < 15; i++) { + mpp_put_bits(&bp, 0, 16); + mpp_put_bits(&bp, 0, 1); + } + } + mpp_put_align(&bp, 64, 0);//128 + memcpy(pps_ptr, reg_ctx->pps_buf, SPSPPS_ALIGNED_SIZE); + } /* --- end spspps data ------*/ + + if (dxva_ctx->pp.scaling_list_enabled_flag) { + RK_U32 addr; + RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_ctx->bufs) + reg_ctx->sclst_offset; + + if (dxva_ctx->pp.scaling_list_data_present_flag) { + addr = (dxva_ctx->pp.pps_id + 16) * 1360; + } else if (dxva_ctx->pp.scaling_list_enabled_flag) { + addr = dxva_ctx->pp.sps_id * 1360; + } else { + addr = 80 * 1360; + } + + hal_h265d_vdpu384a_scalinglist_packet(hal, ptr_scaling + addr, dxva); + + hw_reg->common_addr.reg132_scanlist_addr = reg_ctx->bufs_fd; + mpp_dev_set_reg_offset(reg_ctx->dev, 132, addr + reg_ctx->sclst_offset); + } + +#ifdef dump + fwrite(pps_ptr, 1, 80 * 64, fp); + RK_U32 *tmp = (RK_U32 *)pps_ptr; + for (i = 0; i < 112 / 4; i++) { + mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]); + } +#endif +#ifdef DUMP_VDPU384A_DATAS + { + char *cur_fname = "global_cfg.dat"; + memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path)); + sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname); + dump_data_to_file(dump_cur_fname_path, (void *)bp.pbuf, 64 * bp.index + bp.bitpos, 128, 0); + } +#endif + + return 0; +} + +static void h265d_refine_rcb_size(Vdpu384aRcbInfo *rcb_info, + RK_S32 width, RK_S32 height, void *dxva) +{ + RK_U32 rcb_bits = 0; + h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva; + DXVA_PicParams_HEVC *pp = &dxva_ctx->pp; + RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444 + RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8; + RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3); + RK_U32 tile_row_cut_num = pp->num_tile_rows_minus1; + RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1; + RK_U32 ext_row_align_size = tile_row_cut_num * 64 * 8; + RK_U32 ext_col_align_size = tile_col_cut_num * 64 * 8; + RK_U32 filterd_row_append = 8192; + RK_U32 row_uv_para = 0; + RK_U32 col_uv_para = 0; + + if (chroma_fmt_idc == 1) { + row_uv_para = 1; + col_uv_para = 1; + } else if (chroma_fmt_idc == 2) { + row_uv_para = 1; + col_uv_para = 3; + } else if (chroma_fmt_idc == 3) { + row_uv_para = 3; + col_uv_para = 3; + } + + width = MPP_ALIGN(width, ctu_size); + height = MPP_ALIGN(height, ctu_size); + /* RCB_STRMD_ROW && RCB_STRMD_TILE_ROW*/ + rcb_info[RCB_STRMD_ROW].size = 0; + rcb_info[RCB_STRMD_TILE_ROW].size = 0; + + /* RCB_INTER_ROW && RCB_INTER_TILE_ROW*/ + rcb_bits = ((width + 7) / 8) * 174; + rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits); + rcb_bits += ext_row_align_size; + if (tile_row_cut_num) + rcb_info[RCB_INTER_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits); + else + rcb_info[RCB_INTER_TILE_ROW].size = 0; + + /* RCB_INTRA_ROW && RCB_INTRA_TILE_ROW*/ + rcb_bits = MPP_ALIGN(width, 512) * (bit_depth + 2); + rcb_bits = rcb_bits * 4; //TODO: + rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits); + rcb_bits += ext_row_align_size; + if (tile_row_cut_num) + rcb_info[RCB_INTRA_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits); + else + rcb_info[RCB_INTRA_TILE_ROW].size = 0; + + /* RCB_FILTERD_ROW && RCB_FILTERD_TILE_ROW*/ + rcb_bits = (MPP_ALIGN(width, 64) * (1.2 * bit_depth + 0.5) * (8 + 5 * row_uv_para)); + // save space mode : half for RCB_FILTERD_ROW, half for RCB_FILTERD_PROTECT_ROW + if (width > 4096) + filterd_row_append = 27648; + rcb_info[RCB_FILTERD_ROW].size = MPP_RCB_BYTES(rcb_bits / 2); + rcb_info[RCB_FILTERD_PROTECT_ROW].size = MPP_RCB_BYTES(rcb_bits / 2) + filterd_row_append; + rcb_bits += ext_row_align_size; + if (tile_row_cut_num) + rcb_info[RCB_FILTERD_TILE_ROW].size = MPP_RCB_BYTES(rcb_bits); + else + rcb_info[RCB_FILTERD_TILE_ROW].size = 0; + + /* RCB_FILTERD_TILE_COL */ + if (tile_col_cut_num) { + rcb_bits = (MPP_ALIGN(height, 64) * (1.6 * bit_depth + 0.5) * (16.5 + 5.5 * col_uv_para)) + ext_col_align_size; + rcb_info[RCB_FILTERD_TILE_COL].size = MPP_RCB_BYTES(rcb_bits); + } else { + rcb_info[RCB_FILTERD_TILE_COL].size = 0; + } + +} + +static void hal_h265d_rcb_info_update(void *hal, void *dxva, + Vdpu384aH265dRegSet *hw_regs, + RK_S32 width, RK_S32 height) +{ + HalH265dCtx *reg_ctx = ( HalH265dCtx *)hal; + h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t*)dxva; + DXVA_PicParams_HEVC *pp = &dxva_ctx->pp; + RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444 + RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8; + RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3); + RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1; + (void)hw_regs; + + if (reg_ctx->num_row_tiles != num_tiles || + reg_ctx->bit_depth != bit_depth || + reg_ctx->chroma_fmt_idc != chroma_fmt_idc || + reg_ctx->ctu_size != ctu_size || + reg_ctx->width != width || + reg_ctx->height != height) { + RK_U32 i = 0; + RK_U32 loop = reg_ctx->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->g_buf) : 1; + + reg_ctx->rcb_buf_size = vdpu384a_get_rcb_buf_size((Vdpu384aRcbInfo *)reg_ctx->rcb_info, width, height); + h265d_refine_rcb_size((Vdpu384aRcbInfo *)reg_ctx->rcb_info, width, height, dxva_ctx); + /* vdpu384a_check_rcb_buf_size((Vdpu384aRcbInfo *)reg_ctx->rcb_info, width, height); */ + + for (i = 0; i < loop; i++) { + MppBuffer rcb_buf; + + if (reg_ctx->rcb_buf[i]) { + mpp_buffer_put(reg_ctx->rcb_buf[i]); + reg_ctx->rcb_buf[i] = NULL; + } + mpp_buffer_get(reg_ctx->group, &rcb_buf, reg_ctx->rcb_buf_size); + reg_ctx->rcb_buf[i] = rcb_buf; + } + + reg_ctx->num_row_tiles = num_tiles; + reg_ctx->bit_depth = bit_depth; + reg_ctx->chroma_fmt_idc = chroma_fmt_idc; + reg_ctx->ctu_size = ctu_size; + reg_ctx->width = width; + reg_ctx->height = height; + } +} + +static RK_S32 calc_mv_size(RK_S32 pic_w, RK_S32 pic_h, RK_S32 ctu_w) +{ + RK_S32 seg_w = 64 * 16 * 16 / ctu_w; // colmv_block_size = 16, colmv_per_bytes = 16 + RK_S32 seg_cnt_w = MPP_ALIGN(pic_w, seg_w) / seg_w; + RK_S32 seg_cnt_h = MPP_ALIGN(pic_h, ctu_w) / ctu_w; + RK_S32 mv_size = seg_cnt_w * seg_cnt_h * 64 * 16; + + return mv_size; +} + +static MPP_RET hal_h265d_vdpu384a_gen_regs(void *hal, HalTaskInfo *syn) +{ + RK_S32 i = 0; + RK_S32 log2_min_cb_size; + RK_S32 width, height; + RK_S32 stride_y, stride_uv, virstrid_y; + Vdpu384aH265dRegSet *hw_regs; + RK_S32 ret = MPP_SUCCESS; + MppBuffer streambuf = NULL; + RK_S32 aglin_offset = 0; + RK_S32 valid_ref = -1; + MppBuffer framebuf = NULL; + HalBuf *mv_buf = NULL; + RK_S32 fd = -1; + RK_U32 mv_size = 0; + RK_S32 distance = INT_MAX; + + (void) fd; + if (syn->dec.flags.parse_err || + syn->dec.flags.ref_err) { + h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__); + return MPP_OK; + } + + h265d_dxva2_picture_context_t *dxva_ctx = (h265d_dxva2_picture_context_t *)syn->dec.syntax.data; + HalH265dCtx *reg_ctx = (HalH265dCtx *)hal; + HalBuf *origin_buf = NULL; + + if (reg_ctx ->fast_mode) { + for (i = 0; i < MAX_GEN_REG; i++) { + if (!reg_ctx->g_buf[i].use_flag) { + syn->dec.reg_index = i; + + reg_ctx->spspps_offset = reg_ctx->offset_spspps[i]; + reg_ctx->sclst_offset = reg_ctx->offset_sclst[i]; + + reg_ctx->hw_regs = reg_ctx->g_buf[i].hw_regs; + reg_ctx->g_buf[i].use_flag = 1; + break; + } + } + if (i == MAX_GEN_REG) { + mpp_err("hevc rps buf all used"); + return MPP_ERR_NOMEM; + } + } + + if (syn->dec.syntax.data == NULL) { + mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__); + return MPP_ERR_NULL_PTR; + } + +#ifdef DUMP_VDPU384A_DATAS + { + memset(dump_cur_dir, 0, sizeof(dump_cur_dir)); + sprintf(dump_cur_dir, "hevc/Frame%04d", dump_cur_frame); + if (access(dump_cur_dir, 0)) { + if (mkdir(dump_cur_dir)) + mpp_err_f("error: mkdir %s\n", dump_cur_dir); + } + dump_cur_frame++; + } +#endif + + /* output pps */ + hw_regs = (Vdpu384aH265dRegSet*)reg_ctx->hw_regs; + memset(hw_regs, 0, sizeof(Vdpu384aH265dRegSet)); + + if (NULL == reg_ctx->hw_regs) { + return MPP_ERR_NULL_PTR; + } + + + log2_min_cb_size = dxva_ctx->pp.log2_min_luma_coding_block_size_minus3 + 3; + width = (dxva_ctx->pp.PicWidthInMinCbsY << log2_min_cb_size); + height = (dxva_ctx->pp.PicHeightInMinCbsY << log2_min_cb_size); + mv_size = calc_mv_size(width, height, 1 << log2_min_cb_size) * 2; + + if (reg_ctx->cmv_bufs == NULL || reg_ctx->mv_size < mv_size) { + size_t size = mv_size; + + if (reg_ctx->cmv_bufs) { + hal_bufs_deinit(reg_ctx->cmv_bufs); + reg_ctx->cmv_bufs = NULL; + } + + hal_bufs_init(®_ctx->cmv_bufs); + if (reg_ctx->cmv_bufs == NULL) { + mpp_err_f("colmv bufs init fail"); + return MPP_ERR_NULL_PTR; + } + + reg_ctx->mv_size = mv_size; + reg_ctx->mv_count = mpp_buf_slot_get_count(reg_ctx->slots); + hal_bufs_setup(reg_ctx->cmv_bufs, reg_ctx->mv_count, 1, &size); + } + + { + MppFrame mframe = NULL; + RK_U32 ver_virstride; + RK_U32 virstrid_uv; + MppFrameFormat fmt; + RK_U32 chroma_fmt_idc = dxva_ctx->pp.chroma_format_idc; + + mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits, + SLOT_FRAME_PTR, &mframe); + /* for 8K downscale mode*/ + if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY && + reg_ctx->origin_bufs == NULL) { + vdpu384a_setup_scale_origin_bufs(reg_ctx, mframe); + } + + fmt = mpp_frame_get_fmt(mframe); + + stride_y = mpp_frame_get_hor_stride(mframe); + ver_virstride = mpp_frame_get_ver_stride(mframe); + stride_uv = stride_y; + virstrid_y = ver_virstride * stride_y; + if (chroma_fmt_idc == 3) + stride_uv *= 2; + if (chroma_fmt_idc == 3 || chroma_fmt_idc == 2) { + virstrid_uv = stride_uv * ver_virstride; + } else { + virstrid_uv = stride_uv * ver_virstride / 2; + } + if (MPP_FRAME_FMT_IS_FBC(fmt)) { + RK_U32 fbc_hdr_stride = mpp_frame_get_fbc_hdr_stride(mframe); + RK_U32 fbd_offset; + + hw_regs->ctrl_regs.reg9.dpb_data_sel = 0; + hw_regs->ctrl_regs.reg9.dpb_output_dis = 0; + hw_regs->ctrl_regs.reg9.pp_m_output_mode = 0; + + hw_regs->h265d_paras.reg68_dpb_hor_virstride = fbc_hdr_stride / 64; + fbd_offset = fbc_hdr_stride * MPP_ALIGN(ver_virstride, 64) / 16; + hw_regs->h265d_addrs.reg193_dpb_fbc64x4_payload_offset = fbd_offset; + hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg68_dpb_hor_virstride; + } else if (MPP_FRAME_FMT_IS_TILE(fmt)) { + hw_regs->ctrl_regs.reg9.dpb_data_sel = 1; + hw_regs->ctrl_regs.reg9.dpb_output_dis = 1; + hw_regs->ctrl_regs.reg9.pp_m_output_mode = 2; + + if (chroma_fmt_idc == 0) { //yuv400 + hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 4 / 16; + } else if (chroma_fmt_idc == 2) { //yuv422 + hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 8 / 16; + } else if (chroma_fmt_idc == 3) { //yuv444 + hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 12 / 16; + } else { //yuv420 + hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y * 6 / 16; + } + hw_regs->h265d_paras.reg79_pp_m_y_virstride = (virstrid_y + virstrid_uv) / 16; + hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg77_pp_m_hor_stride; + } else { + hw_regs->ctrl_regs.reg9.dpb_data_sel = 1; + hw_regs->ctrl_regs.reg9.dpb_output_dis = 1; + hw_regs->ctrl_regs.reg9.pp_m_output_mode = 1; + + hw_regs->h265d_paras.reg77_pp_m_hor_stride = stride_y >> 4; + hw_regs->h265d_paras.reg78_pp_m_uv_hor_stride = stride_uv >> 4; + hw_regs->h265d_paras.reg79_pp_m_y_virstride = virstrid_y >> 4; + hw_regs->h265d_paras.reg80_error_ref_hor_virstride = hw_regs->h265d_paras.reg77_pp_m_hor_stride; + } + hw_regs->h265d_paras.reg81_error_ref_raster_uv_hor_virstride = hw_regs->h265d_paras.reg78_pp_m_uv_hor_stride; + hw_regs->h265d_paras.reg82_error_ref_virstride = hw_regs->h265d_paras.reg79_pp_m_y_virstride; + } + mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits, + SLOT_BUFFER, &framebuf); + + if (reg_ctx->origin_bufs) { + origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs, + dxva_ctx->pp.CurrPic.Index7Bits); + framebuf = origin_buf->buf[0]; + } + + /* output rkfbc64 */ + // hw_regs->h265d_addrs.reg168_dpb_decout_base = mpp_buffer_get_fd(framebuf); //just index need map + /* output raster/tile4x4 */ + hw_regs->common_addr.reg135_pp_m_decout_base = mpp_buffer_get_fd(framebuf); //just index need map + hw_regs->h265d_addrs.reg169_error_ref_base = mpp_buffer_get_fd(framebuf); + /*if out_base is equal to zero it means this frame may error + we return directly add by csy*/ + + /* output rkfbc64 */ + // if (!hw_regs->h265d_addrs.reg168_dpb_decout_base) + // return 0; + /* output raster/tile4x4 */ + if (!hw_regs->common_addr.reg135_pp_m_decout_base) + return 0; + + fd = mpp_buffer_get_fd(framebuf); + /* output rkfbc64 */ + // hw_regs->h265d_addrs.reg168_dpb_decout_base = fd; + /* output raster/tile4x4 */ + hw_regs->common_addr.reg135_pp_m_decout_base = fd; + hw_regs->h265d_addrs.reg192_dpb_payload64x4_st_cur_base = fd; + mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_ctx->pp.CurrPic.Index7Bits); + + hw_regs->h265d_addrs.reg216_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]); +#ifdef DUMP_VDPU384A_DATAS + { + char *cur_fname = "colmv_cur_frame.dat"; + memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path)); + sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname); + dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(mv_buf->buf[0]), + mpp_buffer_get_size(mv_buf->buf[0]), 64, 0); + } +#endif + + mpp_buf_slot_get_prop(reg_ctx->packet_slots, syn->dec.input, SLOT_BUFFER, + &streambuf); + if ( dxva_ctx->bitstream == NULL) { + dxva_ctx->bitstream = mpp_buffer_get_ptr(streambuf); + } + +#ifdef DUMP_VDPU384A_DATAS + { + char *cur_fname = "stream_in_128bit.dat"; + memset(dump_cur_fname_path, 0, sizeof(dump_cur_fname_path)); + sprintf(dump_cur_fname_path, "%s/%s", dump_cur_dir, cur_fname); + dump_data_to_file(dump_cur_fname_path, (void *)mpp_buffer_get_ptr(streambuf), + mpp_buffer_get_size(streambuf), 128, 0); + } +#endif + + hw_regs->common_addr.reg128_strm_base = mpp_buffer_get_fd(streambuf); + hw_regs->h265d_paras.reg66_stream_len = ((dxva_ctx->bitstream_size + 15) & (~15)) + 64; + hw_regs->common_addr.reg129_stream_buf_st_base = mpp_buffer_get_fd(streambuf); + hw_regs->common_addr.reg130_stream_buf_end_base = mpp_buffer_get_fd(streambuf); + mpp_dev_set_reg_offset(reg_ctx->dev, 130, mpp_buffer_get_size(streambuf)); + aglin_offset = hw_regs->h265d_paras.reg66_stream_len - dxva_ctx->bitstream_size; + if (aglin_offset > 0) + memset((void *)(dxva_ctx->bitstream + dxva_ctx->bitstream_size), 0, aglin_offset); + + /* common setting */ + hw_regs->ctrl_regs.reg8_dec_mode = 0; // hevc + hw_regs->ctrl_regs.reg9.low_latency_en = 0; + + hw_regs->ctrl_regs.reg10.strmd_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.inter_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.intra_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.transd_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.recon_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.filterd_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.bus_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.ctrl_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.rcb_auto_gating_e = 1; + hw_regs->ctrl_regs.reg10.err_prc_auto_gating_e = 1; + + hw_regs->ctrl_regs.reg11.rd_outstanding = 32; + hw_regs->ctrl_regs.reg11.wr_outstanding = 250; + // hw_regs->ctrl_regs.reg11.dec_timeout_dis = 1; + + hw_regs->ctrl_regs.reg16.error_proc_disable = 1; + hw_regs->ctrl_regs.reg16.error_spread_disable = 0; + hw_regs->ctrl_regs.reg16.roi_error_ctu_cal_en = 0; + + hw_regs->ctrl_regs.reg20_cabac_error_en_lowbits = 0xffffffff; + hw_regs->ctrl_regs.reg21_cabac_error_en_highbits = 0x3ff3ffff; + + hw_regs->ctrl_regs.reg13_core_timeout_threshold = 0xffff; + + + /* output rkfbc64 */ + // valid_ref = hw_regs->h265d_addrs.reg168_dpb_decout_base; + /* output raster/tile4x4 */ + valid_ref = hw_regs->common_addr.reg135_pp_m_decout_base; + reg_ctx->error_index[syn->dec.reg_index] = dxva_ctx->pp.CurrPic.Index7Bits; + + hw_regs->h265d_addrs.reg169_error_ref_base = valid_ref; + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) { + if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff && + dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) { + + MppFrame mframe = NULL; + mpp_buf_slot_get_prop(reg_ctx->slots, + dxva_ctx->pp.RefPicList[i].Index7Bits, + SLOT_BUFFER, &framebuf); + mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.RefPicList[i].Index7Bits, + SLOT_FRAME_PTR, &mframe); + if (mpp_frame_get_thumbnail_en(mframe) == MPP_FRAME_THUMBNAIL_ONLY) { + origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs, + dxva_ctx->pp.RefPicList[i].Index7Bits); + framebuf = origin_buf->buf[0]; + } + if (framebuf != NULL) { + hw_regs->h265d_addrs.reg170_185_ref_base[i] = mpp_buffer_get_fd(framebuf); + hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = mpp_buffer_get_fd(framebuf); + valid_ref = hw_regs->h265d_addrs.reg170_185_ref_base[i]; + if ((pocdistance(dxva_ctx->pp.PicOrderCntValList[i], dxva_ctx->pp.current_poc) < distance) + && (!mpp_frame_get_errinfo(mframe))) { + + distance = pocdistance(dxva_ctx->pp.PicOrderCntValList[i], dxva_ctx->pp.current_poc); + hw_regs->h265d_addrs.reg169_error_ref_base = hw_regs->h265d_addrs.reg170_185_ref_base[i]; + reg_ctx->error_index[syn->dec.reg_index] = dxva_ctx->pp.RefPicList[i].Index7Bits; + hw_regs->ctrl_regs.reg16.error_proc_disable = 1; + } + } else { + hw_regs->h265d_addrs.reg170_185_ref_base[i] = valid_ref; + hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = valid_ref; + } + + mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, dxva_ctx->pp.RefPicList[i].Index7Bits); + hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + } + + if ((reg_ctx->error_index[syn->dec.reg_index] == dxva_ctx->pp.CurrPic.Index7Bits) && + !dxva_ctx->pp.IntraPicFlag && !reg_ctx->cfg->base.disable_error) { + h265h_dbg(H265H_DBG_TASK_ERR, "current frm may be err, should skip process"); + syn->dec.flags.ref_err = 1; + return MPP_OK; + } + + /* pps */ + hw_regs->common_addr.reg131_gbl_base = reg_ctx->bufs_fd; + hw_regs->h265d_paras.reg67_global_len = 0xc; //22 * 8; + + mpp_dev_set_reg_offset(reg_ctx->dev, 131, reg_ctx->spspps_offset); + + hal_h265d_v345_output_pps_packet(hal, syn->dec.syntax.data); + + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_ctx->pp.RefPicList); i++) { + + if (dxva_ctx->pp.RefPicList[i].bPicEntry != 0xff && + dxva_ctx->pp.RefPicList[i].bPicEntry != 0x7f) { + MppFrame mframe = NULL; + + mpp_buf_slot_get_prop(reg_ctx->slots, + dxva_ctx->pp.RefPicList[i].Index7Bits, + SLOT_BUFFER, &framebuf); + + mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.RefPicList[i].Index7Bits, + SLOT_FRAME_PTR, &mframe); + + if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) { + mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]); + hw_regs->h265d_addrs.reg170_185_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base; + hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base; + hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + } else { + mv_buf = hal_bufs_get_buf(reg_ctx->cmv_bufs, reg_ctx->error_index[syn->dec.reg_index]); + hw_regs->h265d_addrs.reg170_185_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base; + hw_regs->h265d_addrs.reg195_210_payload_st_ref_base[i] = hw_regs->h265d_addrs.reg169_error_ref_base; + hw_regs->h265d_addrs.reg217_232_colmv_ref_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + } + + hal_h265d_rcb_info_update(hal, dxva_ctx, hw_regs, width, height); + vdpu384a_setup_rcb(&hw_regs->common_addr, reg_ctx->dev, reg_ctx->fast_mode ? + reg_ctx->rcb_buf[syn->dec.reg_index] : reg_ctx->rcb_buf[0], + (Vdpu384aRcbInfo *)reg_ctx->rcb_info); + vdpu384a_setup_statistic(&hw_regs->ctrl_regs); + mpp_buffer_sync_end(reg_ctx->bufs); + + { + //scale down config + MppFrame mframe = NULL; + MppBuffer mbuffer = NULL; + MppFrameThumbnailMode thumbnail_mode; + + mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits, + SLOT_BUFFER, &mbuffer); + mpp_buf_slot_get_prop(reg_ctx->slots, dxva_ctx->pp.CurrPic.Index7Bits, + SLOT_FRAME_PTR, &mframe); + thumbnail_mode = mpp_frame_get_thumbnail_en(mframe); + switch (thumbnail_mode) { + case MPP_FRAME_THUMBNAIL_ONLY: + hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer); + origin_buf = hal_bufs_get_buf(reg_ctx->origin_bufs, dxva_ctx->pp.CurrPic.Index7Bits); + fd = mpp_buffer_get_fd(origin_buf->buf[0]); + /* output rkfbc64 */ + // hw_regs->h265d_addrs.reg168_dpb_decout_base = fd; + /* output raster/tile4x4 */ + hw_regs->common_addr.reg135_pp_m_decout_base = fd; + hw_regs->h265d_addrs.reg192_dpb_payload64x4_st_cur_base = fd; + hw_regs->h265d_addrs.reg169_error_ref_base = fd; + vdpu384a_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->ctrl_regs, (void*)&hw_regs->h265d_paras); + break; + case MPP_FRAME_THUMBNAIL_MIXED: + hw_regs->common_addr.reg133_scale_down_base = mpp_buffer_get_fd(mbuffer); + vdpu384a_setup_down_scale(mframe, reg_ctx->dev, &hw_regs->ctrl_regs, (void*)&hw_regs->h265d_paras); + break; + case MPP_FRAME_THUMBNAIL_NONE: + default: + hw_regs->ctrl_regs.reg9.scale_down_en = 0; + break; + } + } + + return ret; +} + +static MPP_RET hal_h265d_vdpu384a_start(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + RK_U8* p = NULL; + Vdpu384aH265dRegSet *hw_regs = NULL; + HalH265dCtx *reg_ctx = (HalH265dCtx *)hal; + RK_S32 index = task->dec.reg_index; + + RK_U32 i; + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__); + return MPP_OK; + } + + if (reg_ctx->fast_mode) { + p = (RK_U8*)reg_ctx->g_buf[index].hw_regs; + hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->g_buf[index].hw_regs; + } else { + p = (RK_U8*)reg_ctx->hw_regs; + hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->hw_regs; + } + + if (hw_regs == NULL) { + mpp_err("hal_h265d_start hw_regs is NULL"); + return MPP_ERR_NULL_PTR; + } + for (i = 0; i < 68; i++) { + h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", + i, *((RK_U32*)p)); + //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p)); + p += 4; + } + + do { + MppDevRegWrCfg wr_cfg; + MppDevRegRdCfg rd_cfg; + + wr_cfg.reg = &hw_regs->ctrl_regs; + wr_cfg.size = sizeof(hw_regs->ctrl_regs); + wr_cfg.offset = OFFSET_CTRL_REGS; + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->common_addr; + wr_cfg.size = sizeof(hw_regs->common_addr); + wr_cfg.offset = OFFSET_COMMON_ADDR_REGS; + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->h265d_paras; + wr_cfg.size = sizeof(hw_regs->h265d_paras); + wr_cfg.offset = OFFSET_CODEC_PARAS_REGS; + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->h265d_addrs; + wr_cfg.size = sizeof(hw_regs->h265d_addrs); + wr_cfg.offset = OFFSET_CODEC_ADDR_REGS; + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + rd_cfg.reg = &hw_regs->ctrl_regs.reg15; + rd_cfg.size = sizeof(hw_regs->ctrl_regs.reg15); + rd_cfg.offset = OFFSET_INTERRUPT_REGS; + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_REG_RD, &rd_cfg); + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + + /* rcb info for sram */ + vdpu384a_set_rcbinfo(reg_ctx->dev, (Vdpu384aRcbInfo*)reg_ctx->rcb_info); + + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_SEND, NULL); + if (ret) { + mpp_err_f("send cmd failed %d\n", ret); + break; + } + } while (0); + + return ret; +} + + +static MPP_RET hal_h265d_vdpu384a_wait(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + RK_S32 index = task->dec.reg_index; + HalH265dCtx *reg_ctx = (HalH265dCtx *)hal; + RK_U8* p = NULL; + Vdpu384aH265dRegSet *hw_regs = NULL; + RK_S32 i; + + if (reg_ctx->fast_mode) { + hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->g_buf[index].hw_regs; + } else { + hw_regs = ( Vdpu384aH265dRegSet *)reg_ctx->hw_regs; + } + + p = (RK_U8*)hw_regs; + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__); + goto ERR_PROC; + } + + ret = mpp_dev_ioctl(reg_ctx->dev, MPP_DEV_CMD_POLL, NULL); + if (ret) + mpp_err_f("poll cmd failed %d\n", ret); + +ERR_PROC: + if (task->dec.flags.parse_err || + task->dec.flags.ref_err || + (!hw_regs->ctrl_regs.reg15.rkvdec_frame_rdy_sta) || + hw_regs->ctrl_regs.reg15.rkvdec_strm_error_sta || + hw_regs->ctrl_regs.reg15.rkvdec_core_timeout_sta || + hw_regs->ctrl_regs.reg15.rkvdec_ip_timeout_sta || + hw_regs->ctrl_regs.reg15.rkvdec_bus_error_sta || + hw_regs->ctrl_regs.reg15.rkvdec_buffer_empty_sta || + hw_regs->ctrl_regs.reg15.rkvdec_colmv_ref_error_sta) { + if (!reg_ctx->fast_mode) { + if (reg_ctx->dec_cb) + mpp_callback(reg_ctx->dec_cb, &task->dec); + } else { + MppFrame mframe = NULL; + mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output, + SLOT_FRAME_PTR, &mframe); + if (mframe) { + reg_ctx->fast_mode_err_found = 1; + mpp_frame_set_errinfo(mframe, 1); + } + } + } else { + if (reg_ctx->fast_mode && reg_ctx->fast_mode_err_found) { + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) { + if (task->dec.refer[i] >= 0) { + MppFrame frame_ref = NULL; + + mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.refer[i], + SLOT_FRAME_PTR, &frame_ref); + h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n", + i, task->dec.refer[i], frame_ref); + if (frame_ref && mpp_frame_get_errinfo(frame_ref)) { + MppFrame frame_out = NULL; + mpp_buf_slot_get_prop(reg_ctx->slots, task->dec.output, + SLOT_FRAME_PTR, &frame_out); + mpp_frame_set_errinfo(frame_out, 1); + break; + } + } + } + } + } + + for (i = 0; i < 68; i++) { + if (i == 1) { + h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", + i, *((RK_U32*)p)); + } + + if (i == 45) { + h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", + i, *((RK_U32*)p)); + } + p += 4; + } + + if (reg_ctx->fast_mode) { + reg_ctx->g_buf[index].use_flag = 0; + } + + return ret; +} + +static MPP_RET hal_h265d_vdpu384a_reset(void *hal) +{ + MPP_RET ret = MPP_OK; + HalH265dCtx *p_hal = (HalH265dCtx *)hal; + p_hal->fast_mode_err_found = 0; + (void)hal; + return ret; +} + +static MPP_RET hal_h265d_vdpu384a_flush(void *hal) +{ + MPP_RET ret = MPP_OK; + + (void)hal; + return ret; +} + +static MPP_RET hal_h265d_vdpu384a_control(void *hal, MpiCmd cmd_type, void *param) +{ + MPP_RET ret = MPP_OK; + HalH265dCtx *p_hal = (HalH265dCtx *)hal; + + (void)hal; + (void)param; + switch ((MpiCmd)cmd_type) { + case MPP_DEC_SET_FRAME_INFO: { + MppFrame frame = (MppFrame)param; + MppFrameFormat fmt = mpp_frame_get_fmt(frame); + RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param); + RK_U32 imgheight = mpp_frame_get_height((MppFrame)param); + + if (fmt == MPP_FMT_YUV422SP) { + mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_422); + } else if (fmt == MPP_FMT_YUV444SP) { + mpp_slots_set_prop(p_hal->slots, SLOTS_LEN_ALIGN, rkv_len_align_444); + } + if (MPP_FRAME_FMT_IS_FBC(fmt)) { + vdpu384a_afbc_align_calc(p_hal->slots, frame, 16); + } else if (imgwidth > 1920 || imgheight > 1088) { + mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, mpp_align_128_odd_plus_64); + } + break; + } + case MPP_DEC_GET_THUMBNAIL_FRAME_INFO: { + vdpu384a_update_thumbnail_frame_info((MppFrame)param); + } break; + case MPP_DEC_SET_OUTPUT_FORMAT: { + } break; + default: { + } break; + } + return ret; +} + +const MppHalApi hal_h265d_vdpu384a = { + .name = "h265d_vdpu384a", + .type = MPP_CTX_DEC, + .coding = MPP_VIDEO_CodingHEVC, + .ctx_size = sizeof(HalH265dCtx), + .flag = 0, + .init = hal_h265d_vdpu384a_init, + .deinit = hal_h265d_vdpu384a_deinit, + .reg_gen = hal_h265d_vdpu384a_gen_regs, + .start = hal_h265d_vdpu384a_start, + .wait = hal_h265d_vdpu384a_wait, + .reset = hal_h265d_vdpu384a_reset, + .flush = hal_h265d_vdpu384a_flush, + .control = hal_h265d_vdpu384a_control, +}; diff --git a/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.h b/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.h new file mode 100644 index 00000000..620774aa --- /dev/null +++ b/mpp/hal/rkdec/h265d/hal_h265d_vdpu384a.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#ifndef __HAL_H265D_VDPU384A_H__ +#define __HAL_H265D_VDPU384A_H__ + +#include "mpp_hal.h" +#include "vdpu384a.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const MppHalApi hal_h265d_vdpu384a; + +#ifdef __cplusplus +} +#endif + +#endif /* __HAL_H265D_VDPU384A_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu384a.h b/mpp/hal/rkdec/inc/vdpu384a.h new file mode 100644 index 00000000..55f26468 --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu384a.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#ifndef __VDPU384A_H__ +#define __VDPU384A_H__ + +#define HWID_VDPU384A (0x38436021) + +#endif /* __VDPU384A_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu384a_com.h b/mpp/hal/rkdec/inc/vdpu384a_com.h new file mode 100644 index 00000000..65ac4791 --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu384a_com.h @@ -0,0 +1,695 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#ifndef __VDPU384A_COM_H__ +#define __VDPU384A_COM_H__ + +#include "mpp_device.h" +#include "mpp_buf_slot.h" + +#define OFFSET_CTRL_REGS (8 * sizeof(RK_U32)) +#define OFFSET_COMMON_ADDR_REGS (128 * sizeof(RK_U32)) +#define OFFSET_COM_NEW_REGS (320 * sizeof(RK_U32)) +#define OFFSET_CODEC_PARAS_REGS (64 * sizeof(RK_U32)) +#define OFFSET_CODEC_ADDR_REGS (168 * sizeof(RK_U32)) +#define OFFSET_INTERRUPT_REGS (15 * sizeof(RK_U32)) + +#define RCB_ALLINE_SIZE (64) + +#define MPP_RCB_BYTES(bits) MPP_ALIGN((bits + 7) / 8, RCB_ALLINE_SIZE) + +// #define DUMP_VDPU384A_DATAS + +typedef enum Vdpu384aRcbType_e { + RCB_STRMD_ROW, + RCB_STRMD_TILE_ROW, + RCB_INTER_ROW, + RCB_INTER_TILE_ROW, + RCB_INTRA_ROW, + RCB_INTRA_TILE_ROW, + RCB_FILTERD_ROW, + RCB_FILTERD_PROTECT_ROW, + RCB_FILTERD_TILE_ROW, + RCB_FILTERD_TILE_COL, + RCB_FILTERD_AV1_UP_TILE_COL, + + RCB_BUF_COUNT, +} Vdpu384aRcbType; + +typedef enum Vdpu384a_RCB_SET_MODE_E { + RCB_SET_BY_SIZE_SORT_MODE, + RCB_SET_BY_PRIORITY_MODE, +} Vdpu384aRcbSetMode_e; + +typedef struct Vdpu384aRegVersion_t { + struct SWREG0_ID { + RK_U32 minor_ver : 8; + RK_U32 major_ver : 8; + RK_U32 prod_num : 16; + } reg0; + +} Vdpu384aRegVersion; + +typedef struct Vdpu384aCtrlReg_t { + /* SWREG8_DEC_MODE */ + RK_U32 reg8_dec_mode; + + struct SWREG9_IMPORTANT_EN { + RK_U32 dpb_output_dis : 1; + /* + * 0: dpb data use rkfbc64x4 channel + * 1: dpb data user main pp channel + * 2: dpb data use scl down channel + */ + RK_U32 dpb_data_sel : 2; + RK_U32 reserve0 : 1; + RK_U32 low_latency_en : 1; + RK_U32 scale_down_en : 1; + RK_U32 reserve1 : 1; + RK_U32 pix_range_det_e : 1; + RK_U32 av1_fgs_en : 1; + RK_U32 reserve2 : 3; + RK_U32 scale_down_ratio : 1; + RK_U32 scale_down_10bitto8bit_en : 1; + RK_U32 line_irq_en : 3; + RK_U32 out_cbcr_swap : 1; + RK_U32 dpb_rkfbc_force_uncompress : 1; + RK_U32 dpb_rkfbc_sparse_mode : 1; + RK_U32 reserve3 : 1; + RK_U32 pp_m_fbc32x8_force_uncompress : 1; + RK_U32 pp_m_fbc32x8_sparse_mode : 1; + RK_U32 inter_max_mv_detect_en : 1; + /* + * 0:disable pp main channel output + * 1:pp main channel output raster picture to ddr. + * 2:pp main channel output tile4x4 picture to ddr. + * 3:pp main channel output afbc32x8 picture to ddr. + */ + RK_U32 pp_m_output_mode : 2; + RK_U32 reserve4 : 6; + } reg9; + + struct SWREG10_BLOCK_GATING_EN { + RK_U32 strmd_auto_gating_e : 1; + RK_U32 inter_auto_gating_e : 1; + RK_U32 intra_auto_gating_e : 1; + RK_U32 transd_auto_gating_e : 1; + RK_U32 recon_auto_gating_e : 1; + RK_U32 filterd_auto_gating_e : 1; + RK_U32 bus_auto_gating_e : 1; + RK_U32 ctrl_auto_gating_e : 1; + RK_U32 rcb_auto_gating_e : 1; + RK_U32 err_prc_auto_gating_e : 1; + RK_U32 reserve0 : 22; + } reg10; + + struct SWREG11_CFG_PARA { + RK_U32 frame_irq_dis : 1; + RK_U32 reserve0 : 8; + RK_U32 dec_timeout_dis : 1; + RK_U32 reserve1 : 6; + RK_U32 rd_outstanding : 8; + RK_U32 wr_outstanding : 8; + } reg11; + + struct SWREG12_CACHE_HASH_MASK { + RK_U32 reserve0 : 7; + RK_U32 cache_hash_mask : 25; + } reg12; + + /* SWREG13_CORE_TIMEOUT_THRESHOLD */ + RK_U32 reg13_core_timeout_threshold; + + struct SWREG14_LINE_IRQ_CTRL { + RK_U32 dec_line_irq_step : 16; + RK_U32 dec_line_offset_y_st : 16; + } reg14; + + /* copy from llp, media group add */ + struct SWREG15_IRQ_STA { + RK_U32 rkvdec_frame_rdy_sta : 1; + RK_U32 rkvdec_strm_error_sta : 1; + RK_U32 rkvdec_core_timeout_sta : 1; + RK_U32 rkvdec_ip_timeout_sta : 1; + RK_U32 rkvdec_bus_error_sta : 1; + RK_U32 rkvdec_buffer_empty_sta : 1; + RK_U32 rkvdec_colmv_ref_error_sta : 1; + RK_U32 rkvdec_error_spread_sta : 1; + RK_U32 create_core_timeout_sta : 1; + RK_U32 wlast_miss_match_sta : 1; + RK_U32 rkvdec_core_rst_rdy_sta : 1; + RK_U32 rkvdec_ip_rst_rdy_sta : 1; + RK_U32 force_busidle_rdy_sta : 1; + RK_U32 ltb_pause_rdy_sta : 1; + RK_U32 ltb_end_flag : 1; + RK_U32 unsupport_decmode_error_sta : 1; + RK_U32 wmask_bits : 15; + RK_U32 reserve0 : 1; + } reg15; + + struct SWREG16_ERROR_CTRL_SET { + RK_U32 error_proc_disable : 1; + RK_U32 reserve0 : 3; + RK_U32 error_proc_mode : 1; + RK_U32 reserve1 : 3; + RK_U32 error_spread_disable : 1; + RK_U32 error_fill_mode : 1; + RK_U32 reserve2 : 14; + RK_U32 roi_error_ctu_cal_en : 1; + RK_U32 reserve3 : 7; + } reg16; + + struct SWREG17_ERR_ROI_CTU_OFFSET_START { + RK_U32 roi_x_ctu_offset_st : 12; + RK_U32 reserve0 : 4; + RK_U32 roi_y_ctu_offset_st : 12; + RK_U32 reserve1 : 4; + } reg17; + + struct SWREG18_ERR_ROI_CTU_OFFSET_END { + RK_U32 roi_x_ctu_offset_end : 12; + RK_U32 reserve0 : 4; + RK_U32 roi_y_ctu_offset_end : 12; + RK_U32 reserve1 : 4; + } reg18; + + struct SWREG19_ERROR_REF_INFO { + RK_U32 avs2_ref_error_field : 1; + RK_U32 avs2_ref_error_topfield : 1; + RK_U32 ref_error_topfield_used : 1; + RK_U32 ref_error_botfield_used : 1; + RK_U32 reserve0 : 28; + } reg19; + + /* SWREG20_CABAC_ERROR_EN_LOWBITS */ + RK_U32 reg20_cabac_error_en_lowbits; + + /* SWREG21_CABAC_ERROR_EN_HIGHBITS */ + RK_U32 reg21_cabac_error_en_highbits; + + RK_U32 reserve_reg22; + + struct SWREG23_INVALID_PIXEL_FILL { + RK_U32 fill_y : 10; + RK_U32 fill_u : 10; + RK_U32 fill_v : 10; + RK_U32 reserve0 : 2; + } reg23; + + RK_U32 reserve_reg24_27[4]; + + struct SWREG28_DEBUG_PERF_LATENCY_CTRL0 { + RK_U32 axi_perf_work_e : 1; + RK_U32 reserve0 : 2; + RK_U32 axi_cnt_type : 1; + RK_U32 rd_latency_id : 8; + RK_U32 reserve1 : 4; + RK_U32 rd_latency_thr : 12; + RK_U32 reserve2 : 4; + } reg28; + + struct SWREG29_DEBUG_PERF_LATENCY_CTRL1 { + RK_U32 addr_align_type : 2; + RK_U32 ar_cnt_id_type : 1; + RK_U32 aw_cnt_id_type : 1; + RK_U32 ar_count_id : 8; + RK_U32 reserve0 : 4; + RK_U32 aw_count_id : 8; + RK_U32 rd_band_width_mode : 1; + RK_U32 reserve1 : 7; + } reg29; + + struct SWREG30_QOS_CTRL { + RK_U32 axi_wr_qos_level : 4; + RK_U32 reserve0 : 4; + RK_U32 axi_wr_qos : 4; + RK_U32 reserve1 : 4; + RK_U32 axi_rd_qos_level : 4; + RK_U32 reserve2 : 4; + RK_U32 axi_rd_qos : 4; + RK_U32 reserve3 : 4; + } reg30; + +} Vdpu384aCtrlReg; + +typedef struct Vdpu384aRegCommonAddr_t { + /* SWREG128_STRM_BASE */ + RK_U32 reg128_strm_base; + + /* SWREG129_STREAM_BUF_ST_BASE */ + RK_U32 reg129_stream_buf_st_base; + + /* SWREG130_STREAM_BUF_END_BASE */ + RK_U32 reg130_stream_buf_end_base; + + /* SWREG131_GBL_BASE */ + RK_U32 reg131_gbl_base; + + /* SWREG132_SCANLIST_ADDR */ + RK_U32 reg132_scanlist_addr; + + /* SWREG133_SCL_BASE */ + RK_U32 reg133_scale_down_base; + + /* SWREG134_FGS_BASE */ + RK_U32 reg134_fgs_base; + + /* SWREG135_PP_M_DECOUT_BASE */ + RK_U32 reg135_pp_m_decout_base; + + /* SWREG136_PP_M_FBC32x8_PAYLOAD_OFFSET */ + RK_U32 reg136_pp_m_fbc32x8_payload_offset; + + RK_U32 reserve_reg137_139[3]; + + /* SWREG140_RCB_STRMD_ROW_OFFSET */ + RK_U32 reg140_rcb_strmd_row_offset; + + /* SWREG141_RCB_STRMD_ROW_LEN */ + RK_U32 reg141_rcb_strmd_row_len; + + /* SWREG142_RCB_STRMD_TILE_ROW_OFFSET */ + RK_U32 reg142_rcb_strmd_tile_row_offset; + + /* SWREG143_RCB_STRMD_TILE_ROW_LEN */ + RK_U32 reg143_rcb_strmd_tile_row_len; + + /* SWREG144_RCB_INTER_ROW_OFFSET */ + RK_U32 reg144_rcb_inter_row_offset; + + /* SWREG145_RCB_INTER_ROW_LEN */ + RK_U32 reg145_rcb_inter_row_len; + + /* SWREG146_RCB_INTER_TILE_ROW_OFFSET */ + RK_U32 reg146_rcb_inter_tile_row_offset; + + /* SWREG147_RCB_INTER_TILE_ROW_LEN */ + RK_U32 reg147_rcb_inter_tile_row_len; + + /* SWREG148_RCB_INTRA_ROW_OFFSET */ + RK_U32 reg148_rcb_intra_row_offset; + + /* SWREG149_RCB_INTRA_ROW_LEN */ + RK_U32 reg149_rcb_intra_row_len; + + /* SWREG150_RCB_INTRA_TILE_ROW_OFFSET */ + RK_U32 reg150_rcb_intra_tile_row_offset; + + /* SWREG151_RCB_INTRA_TILE_ROW_LEN */ + RK_U32 reg151_rcb_intra_tile_row_len; + + /* SWREG152_RCB_FILTERD_ROW_OFFSET */ + RK_U32 reg152_rcb_filterd_row_offset; + + /* SWREG153_RCB_FILTERD_ROW_LEN */ + RK_U32 reg153_rcb_filterd_row_len; + + RK_U32 reserve_reg154_155[2]; + + /* SWREG156_RCB_FILTERD_TILE_ROW_OFFSET */ + RK_U32 reg156_rcb_filterd_tile_row_offset; + + /* SWREG157_RCB_FILTERD_TILE_ROW_LEN */ + RK_U32 reg157_rcb_filterd_tile_row_len; + + /* SWREG158_RCB_FILTERD_TILE_COL_OFFSET */ + RK_U32 reg158_rcb_filterd_tile_col_offset; + + /* SWREG159_RCB_FILTERD_TILE_COL_LEN */ + RK_U32 reg159_rcb_filterd_tile_col_len; + + /* SWREG160_RCB_FILTERD_AV1_UPSCALE_TILE_COL_OFFSET */ + RK_U32 reg160_rcb_filterd_av1_upscale_tile_col_offset; + + /* SWREG161_RCB_FILTERD_AV1_UPSCALE_TILE_COL_LEN */ + RK_U32 reg161_rcb_filterd_av1_upscale_tile_col_len; + +} Vdpu384aRegCommonAddr; + +typedef struct Vdpu384aRegCommParas_t { + /* SWREG64_H26X_PARA */ + RK_U32 reg64_unused_bits; + + /* SWREG65_STREAM_PARAM_SET */ + RK_U32 reg65_strm_start_bit; + + /* SWREG66_STREAM_LEN */ + RK_U32 reg66_stream_len; + + /* SWREG67_GLOBAL_LEN */ + RK_U32 reg67_global_len; + + /* SWREG68_DPB_HOR_STRIDE */ + RK_U32 reg68_dpb_hor_virstride; + + RK_U32 reserve_reg69_70[2]; + + /* SWREG71_SCL_Y_HOR_VIRSTRIDE */ + RK_U32 reg71_scl_ref_hor_virstride; + + /* SWREG72_SCL_UV_HOR_VIRSTRIDE */ + RK_U32 reg72_scl_ref_raster_uv_hor_virstride; + + /* SWREG73_SCL_Y_VIRSTRIDE */ + RK_U32 reg73_scl_ref_virstride; + + /* SWREG74_FGS_Y_HOR_VIRSTRIDE */ + RK_U32 reg74_fgs_ref_hor_virstride; + + RK_U32 reserve_reg75_76[2]; + + /* SWREG77_HEAD_HOR_STRIDE */ + RK_U32 reg77_pp_m_hor_stride; + + /* SWREG78_PP_M_RASTER_UV_HOR_STRIDE */ + RK_U32 reg78_pp_m_uv_hor_stride; + + /* SWREG79_PP_M_Y_STRIDE */ + RK_U32 reg79_pp_m_y_virstride; + + /* SWREG80_ERROR_REF_Y_HOR_VIRSTRIDE */ + RK_U32 reg80_error_ref_hor_virstride; + + /* SWREG81_ERROR_REF_UV_HOR_VIRSTRIDE */ + RK_U32 reg81_error_ref_raster_uv_hor_virstride; + + /* SWREG82_ERROR_REF_Y_VIRSTRIDE */ + RK_U32 reg82_error_ref_virstride; + + /* SWREG83_REF0_Y_HOR_VIRSTRIDE */ + RK_U32 reg83_ref0_hor_virstride; + + /* SWREG84_REF0_UV_HOR_VIRSTRIDE */ + RK_U32 reg84_ref0_raster_uv_hor_virstride; + + /* SWREG85_REF0_Y_VIRSTRIDE */ + RK_U32 reg85_ref0_virstride; + + /* SWREG86_REF1_Y_HOR_VIRSTRIDE */ + RK_U32 reg86_ref1_hor_virstride; + + /* SWREG87_REF1_UV_HOR_VIRSTRIDE */ + RK_U32 reg87_ref1_raster_uv_hor_virstride; + + /* SWREG88_REF1_Y_VIRSTRIDE */ + RK_U32 reg88_ref1_virstride; + + /* SWREG89_REF2_Y_HOR_VIRSTRIDE */ + RK_U32 reg89_ref2_hor_virstride; + + /* SWREG90_REF2_UV_HOR_VIRSTRIDE */ + RK_U32 reg90_ref2_raster_uv_hor_virstride; + + /* SWREG91_REF2_Y_VIRSTRIDE */ + RK_U32 reg91_ref2_virstride; + + /* SWREG92_REF3_Y_HOR_VIRSTRIDE */ + RK_U32 reg92_ref3_hor_virstride; + + /* SWREG93_REF3_UV_HOR_VIRSTRIDE */ + RK_U32 reg93_ref3_raster_uv_hor_virstride; + + /* SWREG94_REF3_Y_VIRSTRIDE */ + RK_U32 reg94_ref3_virstride; + + /* SWREG95_REF4_Y_HOR_VIRSTRIDE */ + RK_U32 reg95_ref4_hor_virstride; + + /* SWREG96_REF4_UV_HOR_VIRSTRIDE */ + RK_U32 reg96_ref4_raster_uv_hor_virstride; + + /* SWREG97_REF4_Y_VIRSTRIDE */ + RK_U32 reg97_ref4_virstride; + + /* SWREG98_REF5_Y_HOR_VIRSTRIDE */ + RK_U32 reg98_ref5_hor_virstride; + + /* SWREG99_REF5_UV_HOR_VIRSTRIDE */ + RK_U32 reg99_ref5_raster_uv_hor_virstride; + + /* SWREG100_REF5_Y_VIRSTRIDE */ + RK_U32 reg100_ref5_virstride; + + /* SWREG101_REF6_Y_HOR_VIRSTRIDE */ + RK_U32 reg101_ref6_hor_virstride; + + /* SWREG102_REF6_UV_HOR_VIRSTRIDE */ + RK_U32 reg102_ref6_raster_uv_hor_virstride; + + /* SWREG103_REF6_Y_VIRSTRIDE */ + RK_U32 reg103_ref6_virstride; + + /* SWREG104_REF7_Y_HOR_VIRSTRIDE */ + RK_U32 reg104_ref7_hor_virstride; + + /* SWREG105_REF7_UV_HOR_VIRSTRIDE */ + RK_U32 reg105_ref7_raster_uv_hor_virstride; + + /* SWREG106_REF7_Y_VIRSTRIDE */ + RK_U32 reg106_ref7_virstride; + +} Vdpu384aRegCommParas; + +typedef struct Vdpu384aRegStatistic_t { + struct SWREG256_IDLE_FLAG { + RK_U32 reserve0 : 24; + RK_U32 rkvdec_bus_idle_flag : 1; + RK_U32 reserve1 : 7; + } reg256; + + RK_U32 reserve_reg257; + + /* SWREG258_PERF_MONITOR */ + RK_U32 reg258_perf_rd_max_latency_num; + + /* SWREG259_PERF_MONITOR */ + RK_U32 reg259_perf_rd_latency_samp_num; + + /* SWREG260_PERF_MONITOR */ + RK_U32 reg260_perf_rd_latency_acc_sum; + + /* SWREG261_PERF_MONITOR */ + RK_U32 reg261_perf_rd_axi_total_byte; + + /* SWREG262_PERF_MONITOR */ + RK_U32 reg262_perf_wr_axi_total_bytes; + + /* SWREG263_PERF_MONITOR */ + RK_U32 reg263_perf_working_cnt; + + RK_U32 reserve_reg264_272[9]; + + /* SWREG273_REFLIST_IDX_USED */ + RK_U32 reg273_inter_sw_reflst_idx_use; + + RK_U32 reserve_reg274_284[11]; + + /* SWREG285_PAYLOAD_CNT */ + RK_U32 reg285_filterd_payload_total_cnt; + + struct SWREG286_WR_OFFSET { + RK_U32 filterd_report_offsety : 16; + RK_U32 filterd_report_offsetx : 16; + } reg286; + + struct SWREG287_MAX_PIX { + RK_U32 filterd_max_y : 10; + RK_U32 filterd_max_u : 10; + RK_U32 filterd_max_v : 10; + RK_U32 reserve0 : 2; + } reg287; + + struct SWREG288_MIN_PIX { + RK_U32 filterd_min_y : 10; + RK_U32 filterd_min_u : 10; + RK_U32 filterd_min_v : 10; + RK_U32 reserve0 : 2; + } reg288; + + /* SWREG289_WR_LINE_NUM */ + RK_U32 reg289_filterd_line_irq_offsety; + + RK_U32 reserve_reg290_291[2]; + + struct SWREG292_RCB_RW_SUM { + RK_U32 rcb_rd_sum_chk : 8; + RK_U32 rcb_wr_sum_chk : 8; + RK_U32 reserve0 : 16; + } reg292; + + RK_U32 reserve_reg293; + + struct SWREG294_ERR_CTU_NUM0 { + RK_U32 error_ctu_num : 24; + RK_U32 roi_error_ctu_num_lowbit : 8; + } reg294; + + /* SWREG295_ERR_CTU_NUM1 */ + RK_U32 reg295_roi_error_ctu_num_highbit; + +} Vdpu384aRegStatistic; + +typedef struct Vdpu384aRegLlp_t { + struct SWREG0_LINK_MODE { + RK_U32 llp_mmu_zap_cache_dis : 1; + RK_U32 reserve0 : 15; + RK_U32 core_work_mode : 1; + RK_U32 ccu_core_work_mode : 1; + RK_U32 reserve1 : 3; + RK_U32 ltb_pause_flag : 1; + RK_U32 reserve2 : 10; + } reg0; + + struct SWREG1_CFG_START_ADDR { + RK_U32 reserve0 : 4; + RK_U32 reg_cfg_addr : 28; + } reg1; + + struct SWREG2_LINK_MODE { + RK_U32 pre_frame_num : 30; + RK_U32 reserve0 : 1; + RK_U32 link_mode : 1; + } reg2; + + /* SWREG3_CONFIG_DONE */ + RK_U32 reg3_done; + + /* SWREG4_DECODERED_NUM */ + RK_U32 reg4_num; + + /* SWREG5_DEC_TOTAL_NUM */ + RK_U32 reg5_total_num; + + /* SWREG6_LINK_MODE_EN */ + RK_U32 reg6_mode_en; + + /* SWREG7_SKIP_NUM */ + RK_U32 reg7_num; + + /* SWREG8_CUR_LTB_IDX */ + RK_U32 reg8_ltb_idx; + + RK_U32 reserve_reg9_15[7]; + + /* SWREG16_DEC_E */ + RK_U32 reg16_dec_e; + + /* SWREG17_SOFT_RST */ + RK_U32 reg17_rkvdec_ip_rst_p; + + struct SWREG18_IRQ { + RK_U32 rkvdec_irq : 1; + RK_U32 rkvdec_line_irq : 1; + RK_U32 reserve0 : 14; + RK_U32 wmask : 2; + RK_U32 reserve1 : 14; + } reg18; + + struct SWREG19_STA { + RK_U32 rkvdec_frame_rdy_sta : 1; + RK_U32 rkvdec_strm_error_sta : 1; + RK_U32 rkvdec_core_timeout_sta : 1; + RK_U32 rkvdec_ip_timeout_sta : 1; + RK_U32 rkvdec_bus_error_sta : 1; + RK_U32 rkvdec_buffer_empty_sta : 1; + RK_U32 rkvdec_colmv_ref_error_sta : 1; + RK_U32 rkvdec_error_spread_sta : 1; + RK_U32 create_core_timeout_sta : 1; + RK_U32 wlast_miss_match_sta : 1; + RK_U32 rkvdec_core_rst_rdy_sta : 1; + RK_U32 rkvdec_ip_rst_rdy_sta : 1; + RK_U32 force_busidle_rdy_sta : 1; + RK_U32 ltb_pause_rdy_sta : 1; + RK_U32 ltb_end_flag : 1; + RK_U32 unsupport_decmode_error_sta : 1; + RK_U32 wmask_bits : 15; + RK_U32 reserve0 : 1; + } reg19; + + RK_U32 reserve_reg20; + + /* SWREG21_IP_TIMEOUT_THRESHOD */ + RK_U32 reg21_ip_timeout_threshold; + + struct SWREG22_IP_EN { + RK_U32 ip_timeout_pause_flag : 1; + RK_U32 reserve0 : 3; + RK_U32 abnormal_auto_reset_dis : 1; + RK_U32 reserve1 : 3; + RK_U32 force_busidle_req_flag : 1; + RK_U32 reserve2 : 3; + RK_U32 bus_clkgate_dis : 1; + RK_U32 ctrl_clkgate_dis : 1; + RK_U32 reserve3 : 1; + RK_U32 irq_dis : 1; + RK_U32 wid_reorder_dis : 1; + RK_U32 reserve4 : 7; + RK_U32 clk_cru_mode : 2; + RK_U32 reserve5 : 5; + RK_U32 mmu_sel : 1; + } reg22; + + struct SWREG23_IN_OUT { + RK_U32 endian : 1; + RK_U32 swap32_e : 1; + RK_U32 swap64_e : 1; + RK_U32 str_endian : 1; + RK_U32 str_swap32_e : 1; + RK_U32 str_swap64_e : 1; + RK_U32 reserve0 : 26; + } reg23; + + /* SWREG24_EXTRA_STRM_BASE */ + RK_U32 reg24_extra_stream_base; + + /* SWREG25_EXTRA_STRM_LEN */ + RK_U32 reg25_extra_stream_len; + + /* SWREG26_EXTRA_STRM_PARA_SET */ + RK_U32 reg26_extra_strm_start_bit; + + /* SWREG27_BUF_EMPTY_RESTART */ + RK_U32 reg27_buf_emtpy_restart_p; + + /* SWREG28_RCB_BASE */ + RK_U32 reg28_rcb_base; + +} Vdpu384aRegLlp; + +typedef struct Vdpu384aRcbInfo_t { + RK_U32 reg_idx; + RK_S32 size; + RK_S32 offset; +} Vdpu384aRcbInfo; + +#ifdef __cplusplus +extern "C" { +#endif + +RK_S32 vdpu384a_get_rcb_buf_size(Vdpu384aRcbInfo *info, RK_S32 width, RK_S32 height); +RK_RET vdpu384a_check_rcb_buf_size(Vdpu384aRcbInfo *info, RK_S32 width, RK_S32 height); +void vdpu384a_setup_rcb(Vdpu384aRegCommonAddr *reg, MppDev dev, MppBuffer buf, Vdpu384aRcbInfo *info); +RK_S32 vdpu384a_compare_rcb_size(const void *a, const void *b); +void vdpu384a_setup_statistic(Vdpu384aCtrlReg *com); +void vdpu384a_afbc_align_calc(MppBufSlots slots, MppFrame frame, RK_U32 expand); +RK_S32 vdpu384a_set_rcbinfo(MppDev dev, Vdpu384aRcbInfo *rcb_info); +void vdpu384a_setup_down_scale(MppFrame frame, MppDev dev, Vdpu384aCtrlReg *com, void* comParas); +void vdpu384a_update_thumbnail_frame_info(MppFrame frame); + +#ifdef DUMP_VDPU384A_DATAS +extern RK_U32 dump_cur_frame; +extern char dump_cur_dir[128]; +extern char dump_cur_fname_path[512]; + +MPP_RET flip_string(char *str); +MPP_RET dump_data_to_file(char *fname_path, void *data, RK_U32 data_bit_size, + RK_U32 line_bits, RK_U32 big_end); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __VDPU384A_COM_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu384a_h264d.h b/mpp/hal/rkdec/inc/vdpu384a_h264d.h new file mode 100644 index 00000000..d8e419fe --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu384a_h264d.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#ifndef __VDPU384A_H264D_H__ +#define __VDPU384A_H264D_H__ + +#include "vdpu384a_com.h" + + +typedef struct Vdpu384aRegH264dParam_t { + /* SWREG64_H26X_PARA */ + RK_U32 reg64_unused_bits; + + /* SWREG65_STREAM_PARAM_SET */ + RK_U32 reg65_strm_start_bit; + + /* SWREG66_STREAM_LEN */ + RK_U32 reg66_stream_len; + + /* SWREG67_GLOBAL_LEN */ + RK_U32 reg67_global_len; + + /* SWREG68_DPB_HOR_STRIDE */ + RK_U32 reg68_dpb_hor_virstride; + + RK_U32 reserve_reg69_70[2]; + + /* SWREG71_SCL_Y_HOR_VIRSTRIDE */ + RK_U32 reg71_scl_ref_hor_virstride; + + /* SWREG72_SCL_UV_HOR_VIRSTRIDE */ + RK_U32 reg72_scl_ref_raster_uv_hor_virstride; + + /* SWREG73_SCL_Y_VIRSTRIDE */ + RK_U32 reg73_scl_ref_virstride; + + /* SWREG74_FGS_Y_HOR_VIRSTRIDE */ + RK_U32 reg74_fgs_ref_hor_virstride; + + RK_U32 reserve_reg75_76[2]; + + /* SWREG77_HEAD_HOR_STRIDE */ + RK_U32 reg77_pp_m_hor_stride; + + /* SWREG78_PP_M_RASTER_UV_HOR_STRIDE */ + RK_U32 reg78_pp_m_uv_hor_stride; + + /* SWREG79_PP_M_Y_STRIDE */ + RK_U32 reg79_pp_m_y_virstride; + + /* SWREG80_ERROR_REF_Y_HOR_VIRSTRIDE */ + RK_U32 reg80_error_ref_hor_virstride; + + /* SWREG81_ERROR_REF_UV_HOR_VIRSTRIDE */ + RK_U32 reg81_error_ref_raster_uv_hor_virstride; + + /* SWREG82_ERROR_REF_Y_VIRSTRIDE */ + RK_U32 reg82_error_ref_virstride; + + /* SWREG83_REF0_Y_HOR_VIRSTRIDE */ + RK_U32 reg83_ref0_hor_virstride; + + /* SWREG84_REF0_UV_HOR_VIRSTRIDE */ + RK_U32 reg84_ref0_raster_uv_hor_virstride; + + /* SWREG85_REF0_Y_VIRSTRIDE */ + RK_U32 reg85_ref0_virstride; + + /* SWREG86_REF1_Y_HOR_VIRSTRIDE */ + RK_U32 reg86_ref1_hor_virstride; + + /* SWREG87_REF1_UV_HOR_VIRSTRIDE */ + RK_U32 reg87_ref1_raster_uv_hor_virstride; + + /* SWREG88_REF1_Y_VIRSTRIDE */ + RK_U32 reg88_ref1_virstride; + + /* SWREG89_REF2_Y_HOR_VIRSTRIDE */ + RK_U32 reg89_ref2_hor_virstride; + + /* SWREG90_REF2_UV_HOR_VIRSTRIDE */ + RK_U32 reg90_ref2_raster_uv_hor_virstride; + + /* SWREG91_REF2_Y_VIRSTRIDE */ + RK_U32 reg91_ref2_virstride; + + /* SWREG92_REF3_Y_HOR_VIRSTRIDE */ + RK_U32 reg92_ref3_hor_virstride; + + /* SWREG93_REF3_UV_HOR_VIRSTRIDE */ + RK_U32 reg93_ref3_raster_uv_hor_virstride; + + /* SWREG94_REF3_Y_VIRSTRIDE */ + RK_U32 reg94_ref3_virstride; + + /* SWREG95_REF4_Y_HOR_VIRSTRIDE */ + RK_U32 reg95_ref4_hor_virstride; + + /* SWREG96_REF4_UV_HOR_VIRSTRIDE */ + RK_U32 reg96_ref4_raster_uv_hor_virstride; + + /* SWREG97_REF4_Y_VIRSTRIDE */ + RK_U32 reg97_ref4_virstride; + + /* SWREG98_REF5_Y_HOR_VIRSTRIDE */ + RK_U32 reg98_ref5_hor_virstride; + + /* SWREG99_REF5_UV_HOR_VIRSTRIDE */ + RK_U32 reg99_ref5_raster_uv_hor_virstride; + + /* SWREG100_REF5_Y_VIRSTRIDE */ + RK_U32 reg100_ref5_virstride; + + /* SWREG101_REF6_Y_HOR_VIRSTRIDE */ + RK_U32 reg101_ref6_hor_virstride; + + /* SWREG102_REF6_UV_HOR_VIRSTRIDE */ + RK_U32 reg102_ref6_raster_uv_hor_virstride; + + /* SWREG103_REF6_Y_VIRSTRIDE */ + RK_U32 reg103_ref6_virstride; + + /* SWREG104_REF7_Y_HOR_VIRSTRIDE */ + RK_U32 reg104_ref7_hor_virstride; + + /* SWREG105_REF7_UV_HOR_VIRSTRIDE */ + RK_U32 reg105_ref7_raster_uv_hor_virstride; + + /* SWREG106_REF7_Y_VIRSTRIDE */ + RK_U32 reg106_ref7_virstride; + +} Vdpu384aRegH264dParam; + +typedef struct Vdpu384aRegH264dAddr_t { + /* SWREG168_DECOUT_BASE */ + RK_U32 reg168_dpb_decout_base; + + /* SWREG169_ERROR_REF_BASE */ + RK_U32 reg169_error_ref_base; + + /* SWREG170_185_REF0_15_BASE */ + RK_U32 reg170_185_ref_base[16]; + + RK_U32 reserve_reg186_191[6]; + + /* SWREG192_PAYLOAD_ST_CUR_BASE */ + RK_U32 reg192_dpb_payload64x4_st_cur_base; + + /* SWREG193_FBC_PAYLOAD_OFFSET */ + RK_U32 reg193_dpb_fbc64x4_payload_offset; + + /* SWREG194_PAYLOAD_ST_ERROR_REF_BASE */ + RK_U32 reg194_payload_st_error_ref_base; + + /* SWREG195_210_PAYLOAD_ST_REF0_15_BASE */ + RK_U32 reg195_210_payload_st_ref_base[16]; + + RK_U32 reserve_reg211_215[5]; + + /* SWREG216_COLMV_CUR_BASE */ + RK_U32 reg216_colmv_cur_base; + + /* SWREG217_232_COLMV_REF0_15_BASE */ + RK_U32 reg217_232_colmv_ref_base[16]; + +} Vdpu384aRegH264dAddr; + + +typedef struct Vdpu384aH264dRegSet_t { + Vdpu384aRegVersion reg_version; /* 0 */ + Vdpu384aCtrlReg ctrl_regs; /* 8-30 */ + Vdpu384aRegCommonAddr common_addr; /* 128-134, 140-161 */ + // Vdpu384aRegNew new_add; /* 320-359 */ + + Vdpu384aRegH264dParam h264d_paras; /* 64-74, 80-106 */ + Vdpu384aRegH264dAddr h264d_addrs; /* 168-185, 192-210, 216-232 */ +} Vdpu384aH264dRegSet; + +#endif /* __VDPU384A_H264D_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu384a_h265d.h b/mpp/hal/rkdec/inc/vdpu384a_h265d.h new file mode 100644 index 00000000..a8d33b4b --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu384a_h265d.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#ifndef __VDPU384A_H265D_H__ +#define __VDPU384A_H265D_H__ + +#include "vdpu384a_com.h" + + +typedef struct Vdpu384aRegH265dParas_t { + /* SWREG64_H26X_PARA */ + RK_U32 reg64_unused_bits; + + /* SWREG65_STREAM_PARAM_SET */ + RK_U32 reg65_strm_start_bit; + + /* SWREG66_STREAM_LEN */ + RK_U32 reg66_stream_len; + + /* SWREG67_GLOBAL_LEN */ + RK_U32 reg67_global_len; + + /* SWREG68_DPB_HOR_STRIDE */ + RK_U32 reg68_dpb_hor_virstride; + + RK_U32 reserve_reg69_70[2]; + + /* SWREG71_SCL_Y_HOR_VIRSTRIDE */ + RK_U32 reg71_scl_ref_hor_virstride; + + /* SWREG72_SCL_UV_HOR_VIRSTRIDE */ + RK_U32 reg72_scl_ref_raster_uv_hor_virstride; + + /* SWREG73_SCL_Y_VIRSTRIDE */ + RK_U32 reg73_scl_ref_virstride; + + /* SWREG74_FGS_Y_HOR_VIRSTRIDE */ + RK_U32 reg74_fgs_ref_hor_virstride; + + RK_U32 reserve_reg75_76[2]; + + /* SWREG77_HEAD_HOR_STRIDE */ + RK_U32 reg77_pp_m_hor_stride; + + /* SWREG78_PP_M_RASTER_UV_HOR_STRIDE */ + RK_U32 reg78_pp_m_uv_hor_stride; + + /* SWREG79_PP_M_Y_STRIDE */ + RK_U32 reg79_pp_m_y_virstride; + + /* SWREG80_ERROR_REF_Y_HOR_VIRSTRIDE */ + RK_U32 reg80_error_ref_hor_virstride; + + /* SWREG81_ERROR_REF_UV_HOR_VIRSTRIDE */ + RK_U32 reg81_error_ref_raster_uv_hor_virstride; + + /* SWREG82_ERROR_REF_Y_VIRSTRIDE */ + RK_U32 reg82_error_ref_virstride; + + /* SWREG83_REF0_Y_HOR_VIRSTRIDE */ + RK_U32 reg83_ref0_hor_virstride; + + /* SWREG84_REF0_UV_HOR_VIRSTRIDE */ + RK_U32 reg84_ref0_raster_uv_hor_virstride; + + /* SWREG85_REF0_Y_VIRSTRIDE */ + RK_U32 reg85_ref0_virstride; + + /* SWREG86_REF1_Y_HOR_VIRSTRIDE */ + RK_U32 reg86_ref1_hor_virstride; + + /* SWREG87_REF1_UV_HOR_VIRSTRIDE */ + RK_U32 reg87_ref1_raster_uv_hor_virstride; + + /* SWREG88_REF1_Y_VIRSTRIDE */ + RK_U32 reg88_ref1_virstride; + + /* SWREG89_REF2_Y_HOR_VIRSTRIDE */ + RK_U32 reg89_ref2_hor_virstride; + + /* SWREG90_REF2_UV_HOR_VIRSTRIDE */ + RK_U32 reg90_ref2_raster_uv_hor_virstride; + + /* SWREG91_REF2_Y_VIRSTRIDE */ + RK_U32 reg91_ref2_virstride; + + /* SWREG92_REF3_Y_HOR_VIRSTRIDE */ + RK_U32 reg92_ref3_hor_virstride; + + /* SWREG93_REF3_UV_HOR_VIRSTRIDE */ + RK_U32 reg93_ref3_raster_uv_hor_virstride; + + /* SWREG94_REF3_Y_VIRSTRIDE */ + RK_U32 reg94_ref3_virstride; + + /* SWREG95_REF4_Y_HOR_VIRSTRIDE */ + RK_U32 reg95_ref4_hor_virstride; + + /* SWREG96_REF4_UV_HOR_VIRSTRIDE */ + RK_U32 reg96_ref4_raster_uv_hor_virstride; + + /* SWREG97_REF4_Y_VIRSTRIDE */ + RK_U32 reg97_ref4_virstride; + + /* SWREG98_REF5_Y_HOR_VIRSTRIDE */ + RK_U32 reg98_ref5_hor_virstride; + + /* SWREG99_REF5_UV_HOR_VIRSTRIDE */ + RK_U32 reg99_ref5_raster_uv_hor_virstride; + + /* SWREG100_REF5_Y_VIRSTRIDE */ + RK_U32 reg100_ref5_virstride; + + /* SWREG101_REF6_Y_HOR_VIRSTRIDE */ + RK_U32 reg101_ref6_hor_virstride; + + /* SWREG102_REF6_UV_HOR_VIRSTRIDE */ + RK_U32 reg102_ref6_raster_uv_hor_virstride; + + /* SWREG103_REF6_Y_VIRSTRIDE */ + RK_U32 reg103_ref6_virstride; + + /* SWREG104_REF7_Y_HOR_VIRSTRIDE */ + RK_U32 reg104_ref7_hor_virstride; + + /* SWREG105_REF7_UV_HOR_VIRSTRIDE */ + RK_U32 reg105_ref7_raster_uv_hor_virstride; + + /* SWREG106_REF7_Y_VIRSTRIDE */ + RK_U32 reg106_ref7_virstride; + +} Vdpu384aRegH265dParas; + +typedef struct Vdpu384aRegH265dAddr_t { + /* SWREG168_DECOUT_BASE */ + RK_U32 reg168_dpb_decout_base; + + /* SWREG169_ERROR_REF_BASE */ + RK_U32 reg169_error_ref_base; + + /* SWREG170_185_REF0_15_BASE */ + RK_U32 reg170_185_ref_base[16]; + + RK_U32 reserve_reg186_191[6]; + + /* SWREG192_PAYLOAD_ST_CUR_BASE */ + RK_U32 reg192_dpb_payload64x4_st_cur_base; + + /* SWREG193_FBC_PAYLOAD_OFFSET */ + RK_U32 reg193_dpb_fbc64x4_payload_offset; + + /* SWREG194_PAYLOAD_ST_ERROR_REF_BASE */ + RK_U32 reg194_payload_st_error_ref_base; + + /* SWREG195_210_PAYLOAD_ST_REF0_15_BASE */ + RK_U32 reg195_210_payload_st_ref_base[16]; + + RK_U32 reserve_reg211_215[5]; + + /* SWREG216_COLMV_CUR_BASE */ + RK_U32 reg216_colmv_cur_base; + + /* SWREG217_232_COLMV_REF0_15_BASE */ + RK_U32 reg217_232_colmv_ref_base[16]; + +} Vdpu384aRegH265dAddr; + +typedef struct Vdpu384aH265dRegSet_t { + Vdpu384aRegVersion reg_version; /* 0 */ + Vdpu384aCtrlReg ctrl_regs; /* 8-30 */ + Vdpu384aRegCommonAddr common_addr; /* 128-134, 140-161 */ + Vdpu384aRegH265dParas h265d_paras; /* 64-74, 80-106 */ + Vdpu384aRegH265dAddr h265d_addrs; /* 168-185, 192-210, 216-232 */ +} Vdpu384aH265dRegSet; + +#endif /* __VDPU384A_H265D_H__ */ diff --git a/mpp/hal/rkdec/vdpu384a_com.c b/mpp/hal/rkdec/vdpu384a_com.c new file mode 100644 index 00000000..1c617e14 --- /dev/null +++ b/mpp/hal/rkdec/vdpu384a_com.c @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: Apache-2.0 OR MIT */ +/* + * Copyright (c) 2024 Rockchip Electronics Co., Ltd. + */ + +#define MODULE_TAG "vdpu384a_com" + +#include + +#include "mpp_log.h" +#include "mpp_buffer.h" +#include "mpp_common.h" +#include "mpp_compat_impl.h" +#include "mpp_frame_impl.h" +#include "mpp_debug.h" + +#include "vdpu384a_com.h" + +static RK_U32 rcb_coeff[RCB_BUF_COUNT] = { + [RCB_STRMD_ROW] = 3, + [RCB_STRMD_TILE_ROW] = 3, + [RCB_INTER_ROW] = 6, + [RCB_INTER_TILE_ROW] = 6, + [RCB_INTRA_ROW] = 5, + [RCB_INTRA_TILE_ROW] = 5, + [RCB_FILTERD_ROW] = 90, + [RCB_FILTERD_PROTECT_ROW] = 90, + [RCB_FILTERD_TILE_ROW] = 90, + [RCB_FILTERD_TILE_COL] = 260, + [RCB_FILTERD_AV1_UP_TILE_COL] = 0, +}; + +static RK_S32 update_size_offset(Vdpu384aRcbInfo *info, RK_U32 reg_idx, + RK_S32 offset, RK_S32 len, RK_S32 idx) +{ + RK_S32 buf_size = 0; + + buf_size = MPP_ALIGN(len * rcb_coeff[idx], RCB_ALLINE_SIZE); + info[idx].reg_idx = reg_idx; + info[idx].offset = offset; + info[idx].size = buf_size; + + return buf_size; +} + +RK_S32 vdpu384a_get_rcb_buf_size(Vdpu384aRcbInfo *info, RK_S32 width, RK_S32 height) +{ + RK_S32 offset = 0; + + offset += update_size_offset(info, 140, offset, width, RCB_STRMD_ROW); + offset += update_size_offset(info, 142, offset, width, RCB_STRMD_TILE_ROW); + offset += update_size_offset(info, 144, offset, width, RCB_INTER_ROW); + offset += update_size_offset(info, 146, offset, width, RCB_INTER_TILE_ROW); + offset += update_size_offset(info, 148, offset, width, RCB_INTRA_ROW); + offset += update_size_offset(info, 150, offset, width, RCB_INTRA_TILE_ROW); + offset += update_size_offset(info, 152, offset, width, RCB_FILTERD_ROW); + offset += update_size_offset(info, 154, offset, width, RCB_FILTERD_PROTECT_ROW); + offset += update_size_offset(info, 156, offset, width, RCB_FILTERD_TILE_ROW); + offset += update_size_offset(info, 158, offset, height, RCB_FILTERD_TILE_COL); + offset += update_size_offset(info, 160, offset, height, RCB_FILTERD_AV1_UP_TILE_COL); + + return offset; +} + +RK_RET vdpu384a_check_rcb_buf_size(Vdpu384aRcbInfo *info, RK_S32 width, RK_S32 height) +{ + RK_U32 i; + + for (i = 0; i < RCB_FILTERD_TILE_COL; i++) + mpp_assert(info[i].size < (RK_S32)MPP_ALIGN(width * rcb_coeff[i], RCB_ALLINE_SIZE)); + + for (i = RCB_FILTERD_TILE_COL; i < RCB_BUF_COUNT; i++) + mpp_assert(info[i].size < (RK_S32)MPP_ALIGN(height * rcb_coeff[i], RCB_ALLINE_SIZE)); + + return MPP_OK; +} + +void vdpu384a_setup_rcb(Vdpu384aRegCommonAddr *reg, MppDev dev, + MppBuffer buf, Vdpu384aRcbInfo *info) +{ + RK_U32 i; + + reg->reg140_rcb_strmd_row_offset = mpp_buffer_get_fd(buf); + reg->reg142_rcb_strmd_tile_row_offset = mpp_buffer_get_fd(buf); + reg->reg144_rcb_inter_row_offset = mpp_buffer_get_fd(buf); + reg->reg146_rcb_inter_tile_row_offset = mpp_buffer_get_fd(buf); + reg->reg148_rcb_intra_row_offset = mpp_buffer_get_fd(buf); + reg->reg150_rcb_intra_tile_row_offset = mpp_buffer_get_fd(buf); + reg->reg152_rcb_filterd_row_offset = mpp_buffer_get_fd(buf); + reg->reg156_rcb_filterd_tile_row_offset = mpp_buffer_get_fd(buf); + reg->reg158_rcb_filterd_tile_col_offset = mpp_buffer_get_fd(buf); + reg->reg160_rcb_filterd_av1_upscale_tile_col_offset = mpp_buffer_get_fd(buf); + + reg->reg141_rcb_strmd_row_len = info[RCB_STRMD_ROW].size ; + reg->reg143_rcb_strmd_tile_row_len = info[RCB_STRMD_TILE_ROW].size ; + reg->reg145_rcb_inter_row_len = info[RCB_INTER_ROW].size ; + reg->reg147_rcb_inter_tile_row_len = info[RCB_INTER_TILE_ROW].size ; + reg->reg149_rcb_intra_row_len = info[RCB_INTRA_ROW].size ; + reg->reg151_rcb_intra_tile_row_len = info[RCB_INTRA_TILE_ROW].size ; + reg->reg153_rcb_filterd_row_len = info[RCB_FILTERD_ROW].size ; + reg->reg157_rcb_filterd_tile_row_len = info[RCB_FILTERD_TILE_ROW].size ; + reg->reg159_rcb_filterd_tile_col_len = info[RCB_FILTERD_TILE_COL].size ; + reg->reg161_rcb_filterd_av1_upscale_tile_col_len = info[RCB_FILTERD_AV1_UP_TILE_COL].size; + + for (i = 0; i < RCB_BUF_COUNT; i++) { + if (info[i].offset) + mpp_dev_set_reg_offset(dev, info[i].reg_idx, info[i].offset); + } +} + +RK_S32 vdpu384a_compare_rcb_size(const void *a, const void *b) +{ + RK_S32 val = 0; + Vdpu384aRcbInfo *p0 = (Vdpu384aRcbInfo *)a; + Vdpu384aRcbInfo *p1 = (Vdpu384aRcbInfo *)b; + + val = (p0->size > p1->size) ? -1 : 1; + + return val; +} + +void vdpu384a_setup_statistic(Vdpu384aCtrlReg *ctrl_regs) +{ + ctrl_regs->reg28.axi_perf_work_e = 1; + ctrl_regs->reg28.axi_cnt_type = 1; + ctrl_regs->reg28.rd_latency_id = 11; + + ctrl_regs->reg29.addr_align_type = 1; + ctrl_regs->reg29.ar_cnt_id_type = 0; + ctrl_regs->reg29.aw_cnt_id_type = 1; + ctrl_regs->reg29.ar_count_id = 17; + ctrl_regs->reg29.aw_count_id = 0; + ctrl_regs->reg29.rd_band_width_mode = 0; + + /* set hurry */ + ctrl_regs->reg30.axi_wr_qos = 0; + ctrl_regs->reg30.axi_rd_qos = 0; +} + +void vdpu384a_afbc_align_calc(MppBufSlots slots, MppFrame frame, RK_U32 expand) +{ + RK_U32 ver_stride = 0; + RK_U32 img_height = mpp_frame_get_height(frame); + RK_U32 img_width = mpp_frame_get_width(frame); + RK_U32 hdr_stride = (*compat_ext_fbc_hdr_256_odd) ? + (MPP_ALIGN(img_width, 256) | 256) : + (MPP_ALIGN(img_width, 64)); + + mpp_slots_set_prop(slots, SLOTS_HOR_ALIGN, mpp_align_64); + mpp_slots_set_prop(slots, SLOTS_VER_ALIGN, mpp_align_16); + + mpp_frame_set_fbc_hdr_stride(frame, hdr_stride); + + ver_stride = mpp_align_16(img_height); + if (*compat_ext_fbc_buf_size) { + ver_stride += expand; + } + mpp_frame_set_ver_stride(frame, ver_stride); +} + +RK_S32 vdpu384a_set_rcbinfo(MppDev dev, Vdpu384aRcbInfo *rcb_info) +{ + MppDevRcbInfoCfg rcb_cfg; + RK_U32 i; + + Vdpu384aRcbSetMode_e set_rcb_mode = RCB_SET_BY_PRIORITY_MODE; + + RK_U32 rcb_priority[RCB_BUF_COUNT] = { + RCB_FILTERD_ROW, + RCB_INTER_ROW, + RCB_INTRA_ROW, + RCB_STRMD_ROW, + RCB_INTER_TILE_ROW, + RCB_INTRA_TILE_ROW, + RCB_STRMD_TILE_ROW, + RCB_FILTERD_TILE_ROW, + RCB_FILTERD_TILE_COL, + RCB_FILTERD_AV1_UP_TILE_COL, + RCB_FILTERD_PROTECT_ROW, + }; + /* + * RCB_SET_BY_SIZE_SORT_MODE: by size sort + * RCB_SET_BY_PRIORITY_MODE: by priority + */ + + switch (set_rcb_mode) { + case RCB_SET_BY_SIZE_SORT_MODE : { + Vdpu384aRcbInfo info[RCB_BUF_COUNT]; + + memcpy(info, rcb_info, sizeof(info)); + qsort(info, MPP_ARRAY_ELEMS(info), + sizeof(info[0]), vdpu384a_compare_rcb_size); + + for (i = 0; i < MPP_ARRAY_ELEMS(info); i++) { + rcb_cfg.reg_idx = info[i].reg_idx; + rcb_cfg.size = info[i].size; + if (rcb_cfg.size > 0) { + mpp_dev_ioctl(dev, MPP_DEV_RCB_INFO, &rcb_cfg); + } else + break; + } + } break; + case RCB_SET_BY_PRIORITY_MODE : { + Vdpu384aRcbInfo *info = rcb_info; + RK_U32 index = 0; + + for (i = 0; i < MPP_ARRAY_ELEMS(rcb_priority); i ++) { + index = rcb_priority[i]; + + rcb_cfg.reg_idx = info[index].reg_idx; + rcb_cfg.size = info[index].size; + if (rcb_cfg.size > 0) { + mpp_dev_ioctl(dev, MPP_DEV_RCB_INFO, &rcb_cfg); + } + } + } break; + default: + break; + } + + return 0; +} + +void vdpu384a_update_thumbnail_frame_info(MppFrame frame) +{ + RK_U32 down_scale_height = mpp_frame_get_height(frame) >> 1; + RK_U32 down_scale_width = mpp_frame_get_width(frame) >> 1; + RK_U32 down_scale_ver = MPP_ALIGN(down_scale_height, 16); + RK_U32 down_scale_hor = MPP_ALIGN(down_scale_width, 16); + RK_U32 down_scale_buf_size = 0; + + if (!MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(frame))) { + down_scale_hor = mpp_align_128_odd_plus_64(down_scale_hor); + down_scale_ver = mpp_frame_get_ver_stride(frame) >> 1; + } + + down_scale_buf_size = down_scale_hor * down_scale_ver * 3 / 2; + /* + * no matter what format, scale down image will output as 8bit raster format; + */ + mpp_frame_set_fmt(frame, MPP_FMT_YUV420SP); + mpp_frame_set_width(frame, down_scale_width); + mpp_frame_set_height(frame, down_scale_height); + mpp_frame_set_hor_stride(frame, down_scale_hor); + mpp_frame_set_ver_stride(frame, down_scale_ver); + mpp_frame_set_buf_size(frame, down_scale_buf_size); +} + +void vdpu384a_setup_down_scale(MppFrame frame, MppDev dev, Vdpu384aCtrlReg *com, void* comParas) +{ + RK_U32 down_scale_height = mpp_frame_get_height(frame) >> 1; + RK_U32 down_scale_width = mpp_frame_get_width(frame) >> 1; + RK_U32 down_scale_ver = MPP_ALIGN(down_scale_height, 16); + RK_U32 down_scale_hor = MPP_ALIGN(down_scale_width, 16); + + Vdpu384aRegCommParas* paras = (Vdpu384aRegCommParas*)comParas; + MppFrameFormat fmt = mpp_frame_get_fmt(frame); + MppMeta meta = mpp_frame_get_meta(frame); + RK_U32 down_scale_y_offset = 0; + RK_U32 down_scale_uv_offset = 0; + RK_U32 down_scale_y_virstride = down_scale_ver * down_scale_hor; + RK_U32 downscale_buf_size; + + if (!MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(frame))) { + down_scale_hor = mpp_align_128_odd_plus_64(down_scale_hor); + down_scale_ver = mpp_frame_get_ver_stride(frame) >> 1; + down_scale_y_virstride = down_scale_ver * down_scale_hor; + } + /* + * no matter what format, scale down image will output as 8bit raster format; + * down_scale image buffer size was already added to the buf_size of mpp_frame, + * which was calculated in mpp_buf_slot.cpp: (size = original_size + scaledown_size) + */ + switch ((fmt & MPP_FRAME_FMT_MASK)) { + case MPP_FMT_YUV400 : { + downscale_buf_size = down_scale_y_virstride; + } break; + case MPP_FMT_YUV420SP_10BIT : + case MPP_FMT_YUV420SP : { + downscale_buf_size = down_scale_y_virstride * 3 / 2; + } break; + case MPP_FMT_YUV422SP_10BIT : + case MPP_FMT_YUV422SP : { + downscale_buf_size = down_scale_y_virstride * 2; + } break; + case MPP_FMT_YUV444SP : { + downscale_buf_size = down_scale_y_virstride * 3; + } break; + default : { + downscale_buf_size = down_scale_y_virstride * 3 / 2; + } break; + } + downscale_buf_size = MPP_ALIGN(downscale_buf_size, 16); + + down_scale_y_offset = MPP_ALIGN((mpp_frame_get_buf_size(frame) - downscale_buf_size), 16); + down_scale_uv_offset = down_scale_y_offset + down_scale_y_virstride; + + com->reg9.scale_down_en = 1; + com->reg9.av1_fgs_en = 0; + paras->reg71_scl_ref_hor_virstride = down_scale_hor >> 4; + paras->reg72_scl_ref_raster_uv_hor_virstride = down_scale_hor >> 4; + if ((fmt & MPP_FRAME_FMT_MASK) == MPP_FMT_YUV444SP) + paras->reg72_scl_ref_raster_uv_hor_virstride = down_scale_hor >> 3; + paras->reg73_scl_ref_virstride = down_scale_y_virstride >> 4; + if (mpp_frame_get_thumbnail_en(frame) == MPP_FRAME_THUMBNAIL_MIXED) { + mpp_dev_set_reg_offset(dev, 133, down_scale_y_offset); + mpp_meta_set_s32(meta, KEY_DEC_TBN_Y_OFFSET, down_scale_y_offset); + mpp_meta_set_s32(meta, KEY_DEC_TBN_UV_OFFSET, down_scale_uv_offset); + } +} + +#ifdef DUMP_VDPU384A_DATAS +RK_U32 dump_cur_frame = 0; +char dump_cur_dir[128]; +char dump_cur_fname_path[512]; + +MPP_RET flip_string(char *str) +{ + RK_U32 len = strlen(str); + RK_U32 i, j; + + for (i = 0, j = len - 1; i <= j; i++, j--) { + // swapping characters + char c = str[i]; + str[i] = str[j]; + str[j] = c; + } + + return MPP_OK; +} + +MPP_RET dump_data_to_file(char *fname_path, void *data, RK_U32 data_bit_size, + RK_U32 line_bits, RK_U32 big_end) +{ + RK_U8 *buf_p = (RK_U8 *)data; + RK_U8 cur_data; + RK_U32 i; + RK_U32 loop_cnt; + FILE *dump_fp = NULL; + char line_tmp[256]; + RK_U32 str_idx = 0; + + dump_fp = fopen(fname_path, "w+"); + if (!dump_fp) { + mpp_err_f("open file: %s error!\n", fname_path); + return MPP_NOK; + } + + if ((data_bit_size % 4 != 0) || (line_bits % 8 != 0)) { + mpp_err_f("line bits not align to 4!\n"); + return MPP_NOK; + } + + loop_cnt = data_bit_size / 8; + for (i = 0; i < loop_cnt; i++) { + cur_data = buf_p[i]; + + sprintf(&line_tmp[str_idx++], "%0x", cur_data & 0xf); + if ((i * 8 + 4) % line_bits == 0) { + line_tmp[str_idx++] = '\0'; + str_idx = 0; + if (!big_end) + flip_string(line_tmp); + fprintf(dump_fp, "%s\n", line_tmp); + } + sprintf(&line_tmp[str_idx++], "%0x", (cur_data >> 4) & 0xf); + if ((i * 8 + 8) % line_bits == 0) { + line_tmp[str_idx++] = '\0'; + str_idx = 0; + if (!big_end) + flip_string(line_tmp); + fprintf(dump_fp, "%s\n", line_tmp); + } + } + + // last line + if (data_bit_size % 4) { + cur_data = buf_p[i]; + sprintf(&line_tmp[str_idx++], "%0x", cur_data & 0xf); + if ((i * 8 + 8) % line_bits == 0) { + line_tmp[str_idx++] = '\0'; + str_idx = 0; + if (!big_end) + flip_string(line_tmp); + fprintf(dump_fp, "%s\n", line_tmp); + } + } + if (data_bit_size % line_bits) { + loop_cnt = (line_bits - (data_bit_size % line_bits)) / 4; + for (i = 0; i < loop_cnt; i++) + sprintf(&line_tmp[str_idx++], "%0x", 0); + line_tmp[str_idx++] = '\0'; + str_idx = 0; + if (!big_end) + flip_string(line_tmp); + fprintf(dump_fp, "%s\n", line_tmp); + } + + fclose(dump_fp); + + return MPP_OK; +} +#endif