From e96dc75dd1440a4ace38ca12793f9b41f1a4b15e Mon Sep 17 00:00:00 2001 From: Chandler Chen Date: Tue, 15 Nov 2022 14:03:04 +0800 Subject: [PATCH] [hal_rkdec] add vdpu382 support Signed-off-by: Chandler Chen Change-Id: Ifcb1d35ceb41c3ca90bbfe482025c2147bff268e --- mpp/hal/rkdec/CMakeLists.txt | 2 +- mpp/hal/rkdec/avs2d/CMakeLists.txt | 1 + mpp/hal/rkdec/avs2d/hal_avs2d_api.c | 23 +- mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.c | 1109 ++++++++++++++++++++ mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.h | 44 + mpp/hal/rkdec/h264d/CMakeLists.txt | 1 + mpp/hal/rkdec/h264d/hal_h264d_api.c | 12 +- mpp/hal/rkdec/h264d/hal_h264d_vdpu382.c | 1230 +++++++++++++++++++++++ mpp/hal/rkdec/h264d/hal_h264d_vdpu382.h | 40 + mpp/hal/rkdec/h265d/CMakeLists.txt | 1 + mpp/hal/rkdec/h265d/hal_h265d_api.c | 5 +- mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c | 1182 ++++++++++++++++++++++ mpp/hal/rkdec/h265d/hal_h265d_vdpu382.h | 33 + mpp/hal/rkdec/inc/vdpu382.h | 22 + mpp/hal/rkdec/inc/vdpu382_avs2d.h | 152 +++ mpp/hal/rkdec/inc/vdpu382_com.h | 544 ++++++++++ mpp/hal/rkdec/inc/vdpu382_h264d.h | 269 +++++ mpp/hal/rkdec/inc/vdpu382_h265d.h | 208 ++++ mpp/hal/rkdec/inc/vdpu382_vp9d.h | 315 ++++++ mpp/hal/rkdec/vdpu382_com.c | 194 ++++ mpp/hal/rkdec/vp9d/CMakeLists.txt | 1 + mpp/hal/rkdec/vp9d/hal_vp9d_api.c | 6 +- mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.c | 1075 ++++++++++++++++++++ mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.h | 32 + 24 files changed, 6492 insertions(+), 9 deletions(-) create mode 100644 mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.c create mode 100644 mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.h create mode 100644 mpp/hal/rkdec/h264d/hal_h264d_vdpu382.c create mode 100644 mpp/hal/rkdec/h264d/hal_h264d_vdpu382.h create mode 100644 mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c create mode 100644 mpp/hal/rkdec/h265d/hal_h265d_vdpu382.h create mode 100644 mpp/hal/rkdec/inc/vdpu382.h create mode 100644 mpp/hal/rkdec/inc/vdpu382_avs2d.h create mode 100644 mpp/hal/rkdec/inc/vdpu382_com.h create mode 100644 mpp/hal/rkdec/inc/vdpu382_h264d.h create mode 100644 mpp/hal/rkdec/inc/vdpu382_h265d.h create mode 100644 mpp/hal/rkdec/inc/vdpu382_vp9d.h create mode 100644 mpp/hal/rkdec/vdpu382_com.c create mode 100644 mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.c create mode 100644 mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.h diff --git a/mpp/hal/rkdec/CMakeLists.txt b/mpp/hal/rkdec/CMakeLists.txt index 6e333b19..ea0fcd1b 100644 --- a/mpp/hal/rkdec/CMakeLists.txt +++ b/mpp/hal/rkdec/CMakeLists.txt @@ -1,7 +1,7 @@ # vim: syntax=cmake include_directories(inc) -add_library(vdpu34x_com STATIC vdpu34x_com.c) +add_library(vdpu34x_com STATIC vdpu34x_com.c vdpu382_com.c) if( HAVE_AVSD ) add_subdirectory(avsd) diff --git a/mpp/hal/rkdec/avs2d/CMakeLists.txt b/mpp/hal/rkdec/avs2d/CMakeLists.txt index 726d49be..b838f735 100644 --- a/mpp/hal/rkdec/avs2d/CMakeLists.txt +++ b/mpp/hal/rkdec/avs2d/CMakeLists.txt @@ -13,6 +13,7 @@ set(HAL_AVS2D_HDR # hal avs2 decoder sourse set(HAL_AVS2D_SRC hal_avs2d_rkv.c + hal_avs2d_vdpu382.c hal_avs2d_api.c ) diff --git a/mpp/hal/rkdec/avs2d/hal_avs2d_api.c b/mpp/hal/rkdec/avs2d/hal_avs2d_api.c index 83b5c79d..4e5a7cef 100644 --- a/mpp/hal/rkdec/avs2d/hal_avs2d_api.c +++ b/mpp/hal/rkdec/avs2d/hal_avs2d_api.c @@ -31,6 +31,7 @@ #include "hal_avs2d_api.h" #include "hal_avs2d_rkv.h" +#include "hal_avs2d_vdpu382.h" RK_U32 avs2d_hal_debug = 0; @@ -81,12 +82,24 @@ MPP_RET hal_avs2d_init(void *hal, MppHalCfg *cfg) p_hal = (Avs2dHalCtx_t *)hal; memset(p_hal, 0, sizeof(Avs2dHalCtx_t)); + + RK_U32 hw_id = mpp_get_client_hw_id(VPU_CLIENT_RKVDEC); + p_api = &p_hal->hal_api; - p_api->init = hal_avs2d_rkv_init; - p_api->deinit = hal_avs2d_rkv_deinit; - p_api->reg_gen = hal_avs2d_rkv_gen_regs; - p_api->start = hal_avs2d_rkv_start; - p_api->wait = hal_avs2d_rkv_wait; + if (hw_id == HWID_VDPU382) { + p_api->init = hal_avs2d_vdpu382_init; + p_api->deinit = hal_avs2d_vdpu382_deinit; + p_api->reg_gen = hal_avs2d_vdpu382_gen_regs; + p_api->start = hal_avs2d_vdpu382_start; + p_api->wait = hal_avs2d_vdpu382_wait; + } else { + p_api->init = hal_avs2d_rkv_init; + p_api->deinit = hal_avs2d_rkv_deinit; + p_api->reg_gen = hal_avs2d_rkv_gen_regs; + p_api->start = hal_avs2d_rkv_start; + p_api->wait = hal_avs2d_rkv_wait; + } + p_api->reset = NULL; p_api->flush = NULL; p_api->control = NULL; diff --git a/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.c b/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.c new file mode 100644 index 00000000..7ac57f14 --- /dev/null +++ b/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.c @@ -0,0 +1,1109 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define MODULE_TAG "hal_avs2d_vdpu382" + +#include +#include + +#include "mpp_log.h" +#include "mpp_mem.h" +#include "mpp_common.h" +#include "mpp_debug.h" +#include "mpp_bitput.h" + +#include "avs2d_syntax.h" +#include "hal_avs2d_api.h" +#include "hal_avs2d_vdpu382.h" +#include "mpp_dec_cb_param.h" +#include "vdpu382_avs2d.h" + +#define VDPU382_FAST_REG_SET_CNT (3) +#define MAX_REF_NUM (8) +#define AVS2_RKV_SHPH_SIZE (1408 / 8) /* bytes */ +#define AVS2_RKV_SCALIST_SIZE (80 + 128) /* bytes */ +#define VDPU382_TOTAL_REG_CNT (278) + +#define AVS2_RKV_SHPH_ALIGNED_SIZE (MPP_ALIGN(AVS2_RKV_SHPH_SIZE, SZ_4K)) +#define AVS2_RKV_SCALIST_ALIGNED_SIZE (MPP_ALIGN(AVS2_RKV_SCALIST_SIZE, SZ_4K)) +#define AVS2_RKV_STREAM_INFO_SET_SIZE (AVS2_RKV_SHPH_ALIGNED_SIZE + \ + AVS2_RKV_SCALIST_ALIGNED_SIZE) +#define AVS2_ALL_TBL_BUF_SIZE(cnt) (AVS2_RKV_STREAM_INFO_SET_SIZE * (cnt)) +#define AVS2_SHPH_OFFSET(pos) (AVS2_RKV_STREAM_INFO_SET_SIZE * (pos)) +#define AVS2_SCALIST_OFFSET(pos) (AVS2_SHPH_OFFSET(pos) + AVS2_RKV_SHPH_ALIGNED_SIZE) + +#define COLMV_COMPRESS_EN (1) +#define COLMV_BLOCK_SIZE (16) +#define COLMV_BYTES (16) + +typedef struct avs2d_buf_t { + RK_U32 valid; + RK_U32 offset_shph; + RK_U32 offset_sclst; + Vdpu382Avs2dRegSet *regs; +} Avs2dVdpu382Buf_t; + +typedef struct avs2d_reg_ctx_t { + Avs2dVdpu382Buf_t reg_buf[VDPU382_FAST_REG_SET_CNT]; + + RK_U32 shph_offset; + RK_U32 sclst_offset; + + Vdpu382Avs2dRegSet *regs; + + RK_U8 shph_dat[AVS2_RKV_SHPH_SIZE]; + RK_U8 scalist_dat[AVS2_RKV_SCALIST_SIZE]; + + MppBuffer bufs; + RK_S32 bufs_fd; + void *bufs_ptr; + + MppBuffer rcb_buf[VDPU382_FAST_REG_SET_CNT]; + RK_S32 rcb_buf_size; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + RK_U32 reg_out[VDPU382_TOTAL_REG_CNT]; + +} Avs2dVdpu382RegCtx_t; + +static RK_U32 avs2d_ver_align(RK_U32 val) +{ + return MPP_ALIGN(val, 16); +} + +static RK_U32 avs2d_hor_align(RK_U32 val) +{ + + return MPP_ALIGN(val, 16); +} + +static RK_U32 avs2d_len_align(RK_U32 val) +{ + return (2 * MPP_ALIGN(val, 16)); +} + +static MPP_RET prepare_header(Avs2dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len) +{ + RK_U32 i, j; + BitputCtx_t bp; + RK_U64 *bit_buf = (RK_U64 *)data; + Avs2dSyntax_t *syntax = &p_hal->syntax; + PicParams_Avs2d *pp = &syntax->pp; + AlfParams_Avs2d *alfp = &syntax->alfp; + RefParams_Avs2d *refp = &syntax->refp; + WqmParams_Avs2d *wqmp = &syntax->wqmp; + + memset(data, 0, len); + + mpp_set_bitput_ctx(&bp, bit_buf, len); + //!< sequence header syntax + mpp_put_bits(&bp, pp->chroma_format_idc, 2); + mpp_put_bits(&bp, pp->pic_width_in_luma_samples, 16); + mpp_put_bits(&bp, pp->pic_height_in_luma_samples, 16); + mpp_put_bits(&bp, pp->bit_depth_luma_minus8, 3); + mpp_put_bits(&bp, pp->bit_depth_chroma_minus8, 3); + mpp_put_bits(&bp, pp->lcu_size, 3); + mpp_put_bits(&bp, pp->progressive_sequence, 1); + mpp_put_bits(&bp, pp->field_coded_sequence, 1); + mpp_put_bits(&bp, pp->multi_hypothesis_skip_enable_flag, 1); + mpp_put_bits(&bp, pp->dual_hypothesis_prediction_enable_flag, 1); + mpp_put_bits(&bp, pp->weighted_skip_enable_flag, 1); + mpp_put_bits(&bp, pp->asymmetrc_motion_partitions_enable_flag, 1); + mpp_put_bits(&bp, pp->nonsquare_quadtree_transform_enable_flag, 1); + mpp_put_bits(&bp, pp->nonsquare_intra_prediction_enable_flag, 1); + mpp_put_bits(&bp, pp->secondary_transform_enable_flag, 1); + mpp_put_bits(&bp, pp->sample_adaptive_offset_enable_flag, 1); + mpp_put_bits(&bp, pp->adaptive_loop_filter_enable_flag, 1); + mpp_put_bits(&bp, pp->pmvr_enable_flag, 1); + mpp_put_bits(&bp, pp->cross_slice_loopfilter_enable_flag, 1); + //!< picture header syntax + mpp_put_bits(&bp, pp->picture_type, 3); + mpp_put_bits(&bp, refp->ref_pic_num, 3); + mpp_put_bits(&bp, pp->scene_reference_enable_flag, 1); + mpp_put_bits(&bp, pp->bottom_field_picture_flag, 1); + mpp_put_bits(&bp, pp->fixed_picture_qp, 1); + mpp_put_bits(&bp, pp->picture_qp, 7); + mpp_put_bits(&bp, pp->loop_filter_disable_flag, 1); + mpp_put_bits(&bp, pp->alpha_c_offset, 5); + mpp_put_bits(&bp, pp->beta_offset, 5); + //!< weight quant param + mpp_put_bits(&bp, wqmp->chroma_quant_param_delta_cb, 6); + mpp_put_bits(&bp, wqmp->chroma_quant_param_delta_cr, 6); + mpp_put_bits(&bp, wqmp->pic_weight_quant_enable_flag, 1); + //!< alf param + mpp_put_bits(&bp, alfp->enable_pic_alf_y, 1); + mpp_put_bits(&bp, alfp->enable_pic_alf_cb, 1); + mpp_put_bits(&bp, alfp->enable_pic_alf_cr, 1); + + if (alfp->enable_pic_alf_y) { + RK_U32 alf_filter_num = alfp->alf_filter_num_minus1 + 1; + mpp_put_bits(&bp, alfp->alf_filter_num_minus1, 4); + + for (i = 0; i < 16; i++) + mpp_put_bits(&bp, alfp->alf_coeff_idx_tab[i], 4); + + for (i = 0; i < alf_filter_num; i++) { + for (j = 0; j < 9; j++) { + mpp_put_bits(&bp, alfp->alf_coeff_y[i][j], 7); + } + } + } + + if (alfp->enable_pic_alf_cb) { + for (j = 0; j < 9; j++) + mpp_put_bits(&bp, alfp->alf_coeff_cb[j], 7); + } + + if (alfp->enable_pic_alf_cr) { + for (j = 0; j < 9; j++) + mpp_put_bits(&bp, alfp->alf_coeff_cr[j], 7); + } + + mpp_put_align(&bp, 128, 0); + + return MPP_OK; +} + +static MPP_RET prepare_scalist(Avs2dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len) +{ + RK_U32 i, j; + RK_U32 size_id, block_size; + BitputCtx_t bp; + RK_U64 *bit_buf = (RK_U64 *)data; + Avs2dSyntax_t *syntax = &p_hal->syntax; + WqmParams_Avs2d *wqmp = &syntax->wqmp; + + if (!wqmp->pic_weight_quant_enable_flag) + return MPP_OK; + + memset(data, 0, len); + + mpp_set_bitput_ctx(&bp, bit_buf, len); + + for (size_id = 0; size_id < 2; size_id++) { + block_size = MPP_MIN(1 << (size_id + 2), 8); + for (i = 0; i < block_size; i++) { + for (j = 0 ; j < block_size; j++) + //!< row col reversed + mpp_put_bits(&bp, wqmp->wq_matrix[size_id][size_id * j + i], 8); + } + } + + return MPP_OK; +} + +static RK_S32 get_frame_fd(Avs2dHalCtx_t *p_hal, RK_S32 idx) +{ + RK_S32 ret_fd = 0; + MppBuffer mbuffer = NULL; + + mpp_buf_slot_get_prop(p_hal->frame_slots, idx, SLOT_BUFFER, &mbuffer); + ret_fd = mpp_buffer_get_fd(mbuffer); + + return ret_fd; +} + +static RK_S32 get_packet_fd(Avs2dHalCtx_t *p_hal, RK_S32 idx) +{ + RK_S32 ret_fd = 0; + MppBuffer mbuffer = NULL; + + mpp_buf_slot_get_prop(p_hal->packet_slots, idx, SLOT_BUFFER, &mbuffer); + ret_fd = mpp_buffer_get_fd(mbuffer); + + return ret_fd; +} + +static MPP_RET init_common_regs(Vdpu382Avs2dRegSet *regs) +{ + Vdpu382RegCommon *common = ®s->common; + + common->reg009.dec_mode = 3; // AVS2 + common->reg015.rlc_mode = 0; + + common->reg011.buf_empty_en = 1; + + common->reg010.dec_e = 1; + + common->reg013.h26x_error_mode = 0; + common->reg021.inter_error_prc_mode = 0; + common->reg021.error_deb_en = 0; + common->reg021.error_intra_mode = 0; + + common->reg024.cabac_err_en_lowbits = 0xffffffdf; + common->reg025.cabac_err_en_highbits = 0x3dffffff; + + common->reg026.swreg_block_gating_e = 0xfffff; + common->reg026.reg_cfg_gating_en = 1; + common->reg032_timeout_threshold = 0x0fffffff; + + common->reg011.dec_clkgate_e = 1; + + common->reg013.stmerror_waitdecfifo_empty = 1; + common->reg012.colmv_compress_en = COLMV_COMPRESS_EN; + common->reg012.wr_ddr_align_en = 1; + common->reg012.info_collect_en = 1; + common->reg012.error_info_en = 0; + + return MPP_OK; +} + +//TODO calc rcb buffer size; +/* +static void avs2d_refine_rcb_size(Vdpu382RcbInfo *rcb_info, + Vdpu382Avs2dRegSet *hw_regs, + RK_S32 width, RK_S32 height, void *dxva) +{ + (void) rcb_info; + (void) hw_regs; + (void) width; + (void) height; + (void) dxva; + return; +} +*/ + +static void hal_avs2d_rcb_info_update(void *hal, Vdpu382Avs2dRegSet *hw_regs) +{ + MPP_RET ret = MPP_OK; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + Avs2dVdpu382RegCtx_t *reg_ctx = (Avs2dVdpu382RegCtx_t *)p_hal->reg_ctx; + RK_S32 width = p_hal->syntax.pp.pic_width_in_luma_samples; + RK_S32 height = p_hal->syntax.pp.pic_height_in_luma_samples; + RK_S32 i = 0; + RK_S32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1; + + (void) hw_regs; + + reg_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size(reg_ctx->rcb_info, width, height); + //avs2d_refine_rcb_size(reg_ctx->rcb_info, hw_regs, width, height, (void *)&p_hal->syntax); + + for (i = 0; i < loop; i++) { + MppBuffer rcb_buf = NULL; + + if (reg_ctx->rcb_buf[i]) { + mpp_buffer_put(reg_ctx->rcb_buf[i]); + reg_ctx->rcb_buf[i] = NULL; + } + + ret = mpp_buffer_get(p_hal->buf_group, &rcb_buf, reg_ctx->rcb_buf_size); + + if (ret) + mpp_err_f("AVS2D mpp_buffer_group_get failed\n"); + + reg_ctx->rcb_buf[i] = rcb_buf; + } +} + +static MPP_RET fill_registers(Avs2dHalCtx_t *p_hal, Vdpu382Avs2dRegSet *p_regs, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + RK_U32 i; + MppFrame mframe = NULL; + Avs2dSyntax_t *syntax = &p_hal->syntax; + PicParams_Avs2d *pp = &syntax->pp; + RefParams_Avs2d *refp = &syntax->refp; + HalDecTask *task_dec = &task->dec; + Vdpu382RegCommon *common = &p_regs->common; + RK_U32 is_fbc = 0; + HalBuf *mv_buf = NULL; + + mpp_buf_slot_get_prop(p_hal->frame_slots, task_dec->output, SLOT_FRAME_PTR, &mframe); + is_fbc = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe)); + + //!< caculate the yuv_frame_size + { + RK_U32 hor_virstride = 0; + RK_U32 ver_virstride = 0; + RK_U32 y_virstride = 0; + + hor_virstride = mpp_frame_get_hor_stride(mframe); + ver_virstride = mpp_frame_get_ver_stride(mframe); + y_virstride = hor_virstride * ver_virstride; + AVS2D_HAL_TRACE("is_fbc %d y_virstride %d, hor_virstride %d, ver_virstride %d\n", is_fbc, y_virstride, hor_virstride, ver_virstride); + + if (is_fbc) { + RK_U32 pixel_width = MPP_ALIGN(mpp_frame_get_width(mframe), 64); + RK_U32 fbd_offset = MPP_ALIGN(pixel_width * (ver_virstride + 16) / 16, SZ_4K); + + common->reg012.fbc_e = 1; + common->reg018.y_hor_virstride = pixel_width / 16; + common->reg019.uv_hor_virstride = pixel_width / 16; + common->reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4; + } else { + common->reg012.fbc_e = 0; + common->reg018.y_hor_virstride = hor_virstride / 16; + common->reg019.uv_hor_virstride = hor_virstride / 16; + common->reg020_y_virstride.y_virstride = y_virstride / 16; + } + common->reg013.cur_pic_is_idr = (pp->picture_type == 0 || pp->picture_type == 4 || pp->picture_type == 5); + } + + // set current + { + RK_S32 fd = -1; + p_regs->avs2d_param.reg65_cur_top_poc = mpp_frame_get_poc(mframe); + p_regs->avs2d_param.reg66_cur_bot_poc = 0; + fd = get_frame_fd(p_hal, task_dec->output); + mpp_assert(fd >= 0); + p_regs->common_addr.reg130_decout_base = fd; + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, task_dec->output); + p_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]); + AVS2D_HAL_TRACE("cur frame index %d, fd %d, colmv fd %d", task_dec->output, fd, p_regs->common_addr.reg131_colmv_cur_base); + } + + // set reference + { + RK_U64 ref_flag = 0; + RK_S32 valid_slot = -1; + RK_U32 *ref_low = (RK_U32 *)&p_regs->avs2d_param.reg99; + RK_U32 *ref_hight = (RK_U32 *)&p_regs->avs2d_param.reg100; + + AVS2D_HAL_TRACE("num of ref %d", refp->ref_pic_num); + + for (i = 0; i < refp->ref_pic_num; i++) { + if (task_dec->refer[i] < 0) + continue; + + valid_slot = i; + break; + } + + for (i = 0; i < MAX_REF_NUM; i++) { + if (i < refp->ref_pic_num) { + MppFrame frame_ref = NULL; + + RK_S32 slot_idx = task_dec->refer[i] < 0 ? valid_slot : task_dec->refer[i]; + + if (slot_idx < 0) { + AVS2D_HAL_TRACE("missing ref, could not found valid ref"); + return ret = MPP_ERR_UNKNOW; + } + + mpp_buf_slot_get_prop(p_hal->frame_slots, slot_idx, SLOT_FRAME_PTR, &frame_ref); + + if (frame_ref) { + RK_U32 frm_flag = 1 << 3; + + if (pp->bottom_field_picture_flag) + frm_flag |= 1 << 2; + + if (pp->field_coded_sequence) + frm_flag |= 1; + + ref_flag |= frm_flag << (i * 8); + + p_regs->avs2d_addr.ref_base[i] = get_frame_fd(p_hal, slot_idx); + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, slot_idx); + p_regs->avs2d_addr.colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + + p_regs->avs2d_param.reg67_098_ref_poc[i] = mpp_frame_get_poc(frame_ref); + + AVS2D_HAL_TRACE("ref_base[%d] index=%d, fd = %d, colmv %d, poc %d", + i, slot_idx, p_regs->avs2d_addr.ref_base[i], + p_regs->avs2d_addr.colmv_base[i], p_regs->avs2d_param.reg67_098_ref_poc[i]); + } + } + } + + *ref_low = (RK_U32) (ref_flag & 0xffffffff); + *ref_hight = (RK_U32) ((ref_flag >> 32) & 0xffffffff); + + p_regs->common_addr.reg132_error_ref_base = p_regs->avs2d_addr.ref_base[0]; + } + + // set rlc + { + p_regs->common_addr.reg128_rlc_base = get_packet_fd(p_hal, task_dec->input); + AVS2D_HAL_TRACE("packet fd %d from slot %d", p_regs->common_addr.reg128_rlc_base, task_dec->input); + p_regs->common_addr.reg129_rlcwrite_base = p_regs->common_addr.reg128_rlc_base; + common->reg016_str_len = MPP_ALIGN(mpp_packet_get_length(task_dec->input_packet), 16) + 64; + } + + return ret; +} + +MPP_RET hal_avs2d_vdpu382_deinit(void *hal) +{ + MPP_RET ret = MPP_OK; + RK_U32 i, loop; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + Avs2dVdpu382RegCtx_t *reg_ctx = (Avs2dVdpu382RegCtx_t *)p_hal->reg_ctx; + + AVS2D_HAL_TRACE("In."); + + INP_CHECK(ret, NULL == reg_ctx); + + //!< malloc buffers + loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1; + for (i = 0; i < loop; i++) { + if (reg_ctx->rcb_buf[i]) { + mpp_buffer_put(reg_ctx->rcb_buf[i]); + reg_ctx->rcb_buf[i] = NULL; + } + + MPP_FREE(reg_ctx->reg_buf[i].regs); + } + + if (reg_ctx->bufs) { + mpp_buffer_put(reg_ctx->bufs); + reg_ctx->bufs = NULL; + } + + if (p_hal->cmv_bufs) { + hal_bufs_deinit(p_hal->cmv_bufs); + p_hal->cmv_bufs = NULL; + } + + MPP_FREE(p_hal->reg_ctx); + +__RETURN: + AVS2D_HAL_TRACE("Out. ret %d", ret); + return ret; +} + +MPP_RET hal_avs2d_vdpu382_init(void *hal, MppHalCfg *cfg) +{ + MPP_RET ret = MPP_OK; + RK_U32 i, loop; + Avs2dVdpu382RegCtx_t *reg_ctx; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + + AVS2D_HAL_TRACE("In."); + + INP_CHECK(ret, NULL == p_hal); + + MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(Avs2dVdpu382RegCtx_t))); + reg_ctx = (Avs2dVdpu382RegCtx_t *)p_hal->reg_ctx; + + //!< malloc buffers + loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1; + FUN_CHECK(ret = mpp_buffer_get(p_hal->buf_group, ®_ctx->bufs, AVS2_ALL_TBL_BUF_SIZE(loop))); + reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs); + reg_ctx->bufs_ptr = mpp_buffer_get_ptr(reg_ctx->bufs); + + for (i = 0; i < loop; i++) { + reg_ctx->reg_buf[i].regs = mpp_calloc(Vdpu382Avs2dRegSet, 1); + init_common_regs(reg_ctx->reg_buf[i].regs); + reg_ctx->reg_buf[i].offset_shph = AVS2_SHPH_OFFSET(i); + reg_ctx->reg_buf[i].offset_sclst = AVS2_SCALIST_OFFSET(i); + } + + if (!p_hal->fast_mode) { + reg_ctx->regs = reg_ctx->reg_buf[0].regs; + reg_ctx->shph_offset = reg_ctx->reg_buf[0].offset_shph; + reg_ctx->sclst_offset = reg_ctx->reg_buf[0].offset_sclst; + } + + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, avs2d_hor_align); + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, avs2d_ver_align); + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, avs2d_len_align); + + { + // report hw_info to parser + const MppSocInfo *info = mpp_get_soc_info(); + const void *hw_info = NULL; + + for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) { + if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) { + hw_info = info->dec_caps[i]; + break; + } + } + + mpp_assert(hw_info); + cfg->hw_info = hw_info; + } + +__RETURN: + AVS2D_HAL_TRACE("Out. ret %d", ret); + (void)cfg; + return ret; +__FAILED: + hal_avs2d_vdpu382_deinit(p_hal); + AVS2D_HAL_TRACE("Out. ret %d", ret); + return ret; +} + +static MPP_RET set_up_colmv_buf(void *hal) +{ + MPP_RET ret = MPP_OK; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + Avs2dSyntax_t *syntax = &p_hal->syntax; + PicParams_Avs2d *pp = &syntax->pp; + RK_U32 mv_size = 0; + + RK_U32 ctu_size = 1 << (p_hal->syntax.pp.lcu_size); + RK_U32 segment_w = 64 * COLMV_BLOCK_SIZE * COLMV_BLOCK_SIZE / ctu_size; + RK_U32 segment_h = ctu_size; + RK_U32 pic_w_align = MPP_ALIGN(pp->pic_width_in_luma_samples, segment_w); + RK_U32 pic_h_align = MPP_ALIGN(pp->pic_height_in_luma_samples, segment_h); + RK_U32 seg_cnt_w = pic_w_align / segment_w; + RK_U32 seg_cnt_h = pic_h_align / segment_h; + RK_U32 seg_head_line_size = MPP_ALIGN(seg_cnt_w, 16); + RK_U32 seg_head_size = seg_head_line_size * seg_cnt_h; + RK_U32 seg_payload_size = seg_cnt_w * seg_cnt_h * 64 * COLMV_BYTES; + + if (COLMV_COMPRESS_EN) + mv_size = seg_payload_size + seg_head_size; + else + mv_size = (MPP_ALIGN(p_hal->syntax.pp.pic_width_in_luma_samples, 64) * + MPP_ALIGN(p_hal->syntax.pp.pic_height_in_luma_samples, 64)) >> 5; + + // colmv frame size align to 128byte + if ((mv_size / 8) % 2 == 1) { + mv_size += 8; + } + + if (pp->field_coded_sequence) + mv_size *= 2; + AVS2D_HAL_TRACE("mv_size %d", mv_size); + + if (p_hal->cmv_bufs == NULL || p_hal->mv_size < mv_size) { + size_t size = mv_size; + + if (p_hal->cmv_bufs) { + hal_bufs_deinit(p_hal->cmv_bufs); + p_hal->cmv_bufs = NULL; + } + + hal_bufs_init(&p_hal->cmv_bufs); + if (p_hal->cmv_bufs == NULL) { + mpp_err_f("colmv bufs init fail"); + ret = MPP_ERR_INIT; + goto __RETURN; + } + + p_hal->mv_size = mv_size; + p_hal->mv_count = mpp_buf_slot_get_count(p_hal->frame_slots); + hal_bufs_setup(p_hal->cmv_bufs, p_hal->mv_count, 1, &size); + } + +__RETURN: + return ret; +} + +MPP_RET hal_avs2d_vdpu382_gen_regs(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + Avs2dVdpu382RegCtx_t *reg_ctx; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + Vdpu382Avs2dRegSet *regs = NULL; + + AVS2D_HAL_TRACE("In."); + + INP_CHECK(ret, NULL == p_hal); + + if (task->dec.flags.parse_err || task->dec.flags.ref_err) { + ret = MPP_NOK; + goto __RETURN; + } + + ret = set_up_colmv_buf(p_hal); + if (ret) + goto __RETURN; + + reg_ctx = (Avs2dVdpu382RegCtx_t *)p_hal->reg_ctx; + + if (p_hal->fast_mode) { + RK_U32 i = 0; + + for (i = 0; i < MPP_ARRAY_ELEMS(reg_ctx->reg_buf); i++) { + if (!reg_ctx->reg_buf[i].valid) { + task->dec.reg_index = i; + regs = reg_ctx->reg_buf[i].regs; + reg_ctx->shph_offset = reg_ctx->reg_buf[i].offset_shph; + reg_ctx->sclst_offset = reg_ctx->reg_buf[i].offset_sclst; + reg_ctx->regs = reg_ctx->reg_buf[i].regs; + reg_ctx->reg_buf[i].valid = 1; + break; + } + } + + mpp_assert(regs); + } + + regs = reg_ctx->regs; + + prepare_header(p_hal, reg_ctx->shph_dat, sizeof(reg_ctx->shph_dat)); + prepare_scalist(p_hal, reg_ctx->scalist_dat, sizeof(reg_ctx->scalist_dat)); + + ret = fill_registers(p_hal, regs, task); + + if (ret) + goto __RETURN; + + { + memcpy(reg_ctx->bufs_ptr + reg_ctx->shph_offset, reg_ctx->shph_dat, sizeof(reg_ctx->shph_dat)); + memcpy(reg_ctx->bufs_ptr + reg_ctx->sclst_offset, reg_ctx->scalist_dat, sizeof(reg_ctx->scalist_dat)); + regs->common.reg012.scanlist_addr_valid_en = 1; + + MppDevRegOffsetCfg trans_cfg; + trans_cfg.reg_idx = 161; + trans_cfg.offset = reg_ctx->shph_offset; + regs->avs2d_addr.head_base = reg_ctx->bufs_fd; + mpp_dev_ioctl(p_hal->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + + regs->avs2d_param.reg105.head_len = AVS2_RKV_SHPH_SIZE / 16; + regs->avs2d_param.reg105.head_len -= (regs->avs2d_param.reg105.head_len > 0) ? 1 : 0; + + trans_cfg.reg_idx = 180; + trans_cfg.offset = reg_ctx->sclst_offset; + regs->avs2d_addr.scanlist_addr = reg_ctx->bufs_fd; + mpp_dev_ioctl(p_hal->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (avs2d_hal_debug & AVS2D_HAL_DBG_IN) { + FILE *fp_shph = NULL; + char name[50]; + snprintf(name, sizeof(name), "/data/tmp/rkv_shph_%03d.bin", p_hal->frame_no); + fp_shph = fopen(name, "wb"); + fwrite(reg_ctx->bufs_ptr + reg_ctx->shph_offset, 1, sizeof(reg_ctx->shph_dat), fp_shph); + fclose(fp_shph); + } + + if (avs2d_hal_debug & AVS2D_HAL_DBG_IN) { + FILE *fp_scalist = NULL; + char name[50]; + snprintf(name, sizeof(name), "/data/tmp/rkv_scalist_%03d.bin", p_hal->frame_no); + fp_scalist = fopen(name, "wb"); + fwrite(reg_ctx->bufs_ptr + reg_ctx->sclst_offset, 1, sizeof(reg_ctx->scalist_dat), fp_scalist); + fclose(fp_scalist); + } + + // set rcb + { + hal_avs2d_rcb_info_update(p_hal, regs); + vdpu382_setup_rcb(®s->common_addr, p_hal->dev, p_hal->fast_mode ? + reg_ctx->rcb_buf[task->dec.reg_index] : reg_ctx->rcb_buf[0], + reg_ctx->rcb_info); + + } + + if (avs2d_hal_debug & AVS2D_HAL_DBG_IN) { + FILE *fp_rcb = NULL; + char name[50]; + void *base = NULL; + snprintf(name, sizeof(name), "/data/tmp/rkv_rcb_%03d.bin", p_hal->frame_no); + fp_rcb = fopen(name, "wb"); + base = mpp_buffer_get_ptr(reg_ctx->rcb_buf[0]); + fwrite(base, 1, reg_ctx->rcb_buf_size, fp_rcb); + fclose(fp_rcb); + + } + + vdpu382_setup_statistic(®s->common, ®s->statistic); + /* enable reference frame usage feedback */ + regs->statistic.reg265.perf_cnt0_sel = 42; + regs->statistic.reg266_perf_cnt0 = 0; + +__RETURN: + AVS2D_HAL_TRACE("Out. ret %d", ret); + return ret; +} + +static MPP_RET hal_avs2d_vdpu382_dump_reg_write(void *hal, Vdpu382Avs2dRegSet *regs) +{ + MPP_RET ret = MPP_OK; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + FILE *fp_reg = NULL; + RK_U32 i = 0; + char name[50]; + snprintf(name, sizeof(name), "/data/tmp/rkv_reg_write_%03d.txt", p_hal->frame_no); + fp_reg = fopen(name , "w+"); + + fprintf(fp_reg, "********Frame num %d\n", p_hal->frame_no); + for (i = 0; i < 8; i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", i, 0); + + for (i = 0; i < sizeof(Vdpu382RegCommon) / sizeof(RK_U32); i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", (RK_U32)(i + OFFSET_COMMON_REGS / sizeof(RK_U32)), + ((RK_U32 *)®s->common)[i]); + + for (i = 0; i < 63 - 32; i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", i + 33, 0); + + for (i = 0; i < sizeof(Vdpu382RegAvs2dParam) / sizeof(RK_U32); i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", (RK_U32)(i + OFFSET_CODEC_PARAMS_REGS / sizeof(RK_U32)), + ((RK_U32 *)®s->avs2d_param)[i]); + + for (i = 0; i < 127 - 112; i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", i + 113, 0); + + for (i = 0; i < sizeof(Vdpu382RegCommonAddr) / sizeof(RK_U32); i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", (RK_U32)(i + OFFSET_COMMON_ADDR_REGS / sizeof(RK_U32)), + ((RK_U32 *)®s->common_addr)[i]); + + for (i = 0; i < 159 - 142; i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", i + 143, 0); + + + for (i = 0; i < sizeof(Vdpu382RegAvs2dAddr) / sizeof(RK_U32); i++ ) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", (RK_U32)(i + OFFSET_CODEC_ADDR_REGS / sizeof(RK_U32)), + ((RK_U32 *)®s->avs2d_addr)[i]); + + for (i = 0; i < 223 - 197; i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", i + 198, 0); + + for (i = 0; i < sizeof(Vdpu382RegIrqStatus) / sizeof(RK_U32); i++ ) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", (RK_U32)(i + OFFSET_INTERRUPT_REGS / sizeof(RK_U32)), + ((RK_U32 *)®s->irq_status)[i]); + + for (i = 0; i < 255 - 237; i++) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", i + 238, 0); + + for (i = 0; i < sizeof(Vdpu382RegStatistic) / sizeof(RK_U32); i++ ) + fprintf(fp_reg, "Write reg[%03d] : 0x%08x\n", (RK_U32)(i + OFFSET_STATISTIC_REGS / sizeof(RK_U32)), + ((RK_U32 *)®s->statistic)[i]); + + fclose(fp_reg); + return ret; +} + +static MPP_RET hal_avs2d_vdpu382_dump_stream(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + + FILE *fp_stream = NULL; + char name[50]; + MppBuffer buffer = NULL; + void *base = NULL; + mpp_buf_slot_get_prop(p_hal->packet_slots, task->dec.input, SLOT_BUFFER, &buffer); + base = mpp_buffer_get_ptr(buffer); + snprintf(name, sizeof(name), "/data/tmp/rkv_stream_in_%03d.bin", p_hal->frame_no); + fp_stream = fopen(name, "wb"); + fwrite(base, 1, mpp_packet_get_length(task->dec.input_packet), fp_stream); + fclose(fp_stream); + + return ret; +} + +MPP_RET hal_avs2d_vdpu382_start(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + Vdpu382Avs2dRegSet *regs = NULL; + Avs2dVdpu382RegCtx_t *reg_ctx; + MppDev dev = NULL; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + + AVS2D_HAL_TRACE("In."); + INP_CHECK(ret, NULL == p_hal); + + if (task->dec.flags.parse_err || task->dec.flags.ref_err) { + ret = MPP_NOK; + goto __RETURN; + } + + reg_ctx = (Avs2dVdpu382RegCtx_t *)p_hal->reg_ctx; + regs = p_hal->fast_mode ? reg_ctx->reg_buf[task->dec.reg_index].regs : reg_ctx->regs; + dev = p_hal->dev; + + p_hal->frame_no++; + + do { + MppDevRegWrCfg wr_cfg; + MppDevRegRdCfg rd_cfg; + + wr_cfg.reg = ®s->common; + wr_cfg.size = sizeof(regs->common); + wr_cfg.offset = OFFSET_COMMON_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->avs2d_param; + wr_cfg.size = sizeof(regs->avs2d_param); + wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->common_addr; + wr_cfg.size = sizeof(regs->common_addr); + wr_cfg.offset = OFFSET_COMMON_ADDR_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->avs2d_addr; + wr_cfg.size = sizeof(regs->avs2d_addr); + wr_cfg.offset = OFFSET_CODEC_ADDR_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->statistic; + wr_cfg.size = sizeof(regs->statistic); + wr_cfg.offset = OFFSET_STATISTIC_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + rd_cfg.reg = ®s->irq_status; + rd_cfg.size = sizeof(regs->irq_status); + rd_cfg.offset = OFFSET_INTERRUPT_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + + rd_cfg.reg = ®s->avs2d_param; + rd_cfg.size = sizeof(regs->avs2d_param); + rd_cfg.offset = OFFSET_CODEC_PARAMS_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + + rd_cfg.reg = ®s->statistic; + rd_cfg.size = sizeof(regs->statistic); + rd_cfg.offset = OFFSET_STATISTIC_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + if (avs2d_hal_debug & AVS2D_HAL_DBG_REG) { + memset(reg_ctx->reg_out, 0, sizeof(reg_ctx->reg_out)); + rd_cfg.reg = reg_ctx->reg_out; + rd_cfg.size = sizeof(reg_ctx->reg_out); + rd_cfg.offset = 0; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + } + + // rcb info for sram + { + RK_U32 i = 0; + MppDevRcbInfoCfg rcb_cfg; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + + memcpy(rcb_info, reg_ctx->rcb_info, sizeof(rcb_info)); + qsort(rcb_info, MPP_ARRAY_ELEMS(rcb_info), + sizeof(rcb_info[0]), vdpu382_compare_rcb_size); + + for (i = 0; i < MPP_ARRAY_ELEMS(rcb_info); i++) { + rcb_cfg.reg_idx = rcb_info[i].reg; + rcb_cfg.size = rcb_info[i].size; + + if (rcb_cfg.size > 0) { + mpp_dev_ioctl(dev, MPP_DEV_RCB_INFO, &rcb_cfg); + } else + break; + } + } + + if (avs2d_hal_debug & AVS2D_HAL_DBG_IN) + hal_avs2d_vdpu382_dump_stream(hal, task); + + if (avs2d_hal_debug & AVS2D_HAL_DBG_REG) + hal_avs2d_vdpu382_dump_reg_write(hal, regs); + + // send request to hardware + ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL); + if (ret) { + mpp_err_f("send cmd failed %d\n", ret); + break; + } + + } while (0); + +__RETURN: + AVS2D_HAL_TRACE("Out."); + return ret; +} + + +static RK_U8 fetch_data(RK_U32 fmt, RK_U8 *line, RK_U32 num) +{ + RK_U32 offset = 0; + RK_U32 value = 0; + + if (fmt == MPP_FMT_YUV420SP_10BIT) { + offset = (num * 2) & 7; + value = (line[num * 10 / 8] >> offset) | + (line[num * 10 / 8 + 1] << (8 - offset)); + + value = (value & 0x3ff) >> 2; + } else if (fmt == MPP_FMT_YUV420SP) { + value = line[num]; + } + + return value; +} + +static MPP_RET hal_avs2d_vdpu382_dump_yuv(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + + MppFrameFormat fmt = MPP_FMT_YUV420SP; + RK_U32 vir_w = 0; + RK_U32 vir_h = 0; + RK_U32 i = 0; + RK_U32 j = 0; + FILE *fp_stream = NULL; + char name[50]; + MppBuffer buffer = NULL; + MppFrame frame; + void *base = NULL; + + ret = mpp_buf_slot_get_prop(p_hal->frame_slots, task->dec.output, SLOT_FRAME_PTR, &frame); + + if (ret != MPP_OK || frame == NULL) + mpp_log_f("failed to get frame slot %d", task->dec.output); + + ret = mpp_buf_slot_get_prop(p_hal->frame_slots, task->dec.output, SLOT_BUFFER, &buffer); + + if (ret != MPP_OK || buffer == NULL) + mpp_log_f("failed to get frame buffer slot %d", task->dec.output); + + AVS2D_HAL_TRACE("frame slot %d, fd %d\n", task->dec.output, mpp_buffer_get_fd(buffer)); + base = mpp_buffer_get_ptr(buffer); + vir_w = mpp_frame_get_hor_stride(frame); + vir_h = mpp_frame_get_ver_stride(frame); + fmt = mpp_frame_get_fmt(frame); + snprintf(name, sizeof(name), "/data/tmp/rkv_out_%dx%d_nv12_%03d.yuv", vir_w, vir_h, + p_hal->frame_no); + fp_stream = fopen(name, "wb"); + + if (fmt != MPP_FMT_YUV420SP_10BIT) { + fwrite(base, 1, vir_w * vir_h * 3 / 2, fp_stream); + } else { + RK_U8 tmp = 0; + for (i = 0; i < vir_h; i++) { + for (j = 0; j < vir_w; j++) { + tmp = fetch_data(fmt, base, j); + fwrite(&tmp, 1, 1, fp_stream); + } + base += vir_w; + } + + for (i = 0; i < vir_h / 2; i++) { + for (j = 0; j < vir_w; j++) { + tmp = fetch_data(fmt, base, j); + fwrite(&tmp, 1, 1, fp_stream); + } + base += vir_w; + } + } + fclose(fp_stream); + + return ret; +} + +MPP_RET hal_avs2d_vdpu382_wait(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + Avs2dHalCtx_t *p_hal = (Avs2dHalCtx_t *)hal; + Avs2dVdpu382RegCtx_t *reg_ctx; + Vdpu382Avs2dRegSet *p_regs; + + INP_CHECK(ret, NULL == p_hal); + reg_ctx = (Avs2dVdpu382RegCtx_t *)p_hal->reg_ctx; + p_regs = p_hal->fast_mode ? reg_ctx->reg_buf[task->dec.reg_index].regs : reg_ctx->regs; + + if (task->dec.flags.parse_err || task->dec.flags.ref_err) { + AVS2D_HAL_DBG(AVS2D_HAL_DBG_ERROR, "found task error.\n"); + ret = MPP_NOK; + goto __RETURN; + } else { + ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL); + if (ret) + mpp_err_f("poll cmd failed %d\n", ret); + } + + if (avs2d_hal_debug & AVS2D_HAL_DBG_OUT) + hal_avs2d_vdpu382_dump_yuv(hal, task); + + if (avs2d_hal_debug & AVS2D_HAL_DBG_REG) { + FILE *fp_reg = NULL; + RK_U32 i = 0; + char name[50]; + snprintf(name, sizeof(name), "/data/tmp/rkv_reg_read_%03d.txt", p_hal->frame_no); + fp_reg = fopen(name , "w+"); + + for (i = 0; i < 278; i++) + fprintf(fp_reg, "%08x\n", reg_ctx->reg_out[i]); + + fclose(fp_reg); + } + + AVS2D_HAL_TRACE("read reg[224] 0x%08x\n", p_regs->irq_status.reg224); + + if (p_hal->dec_cb) { + DecCbHalDone param; + + param.task = (void *)&task->dec; + param.regs = (RK_U32 *)p_regs; + + if (p_regs->irq_status.reg224.dec_error_sta || + (!p_regs->irq_status.reg224.dec_rdy_sta) || + p_regs->irq_status.reg224.buf_empty_sta || + p_regs->irq_status.reg226.strmd_error_status || + p_regs->irq_status.reg227.colmv_error_ref_picidx || + p_regs->irq_status.reg225.strmd_detect_error_flag) + param.hard_err = 1; + else + param.hard_err = 0; + + task->dec.flags.ref_used = p_regs->statistic.reg266_perf_cnt0; + + if (task->dec.flags.ref_miss) { + RK_U32 ref_hw_usage = p_regs->statistic.reg266_perf_cnt0; + + AVS2D_HAL_TRACE("hal frame %d ref miss %x hard_err %d hw_usage %x", p_hal->frame_no, + task->dec.flags.ref_miss, param.hard_err, ref_hw_usage); + } + + AVS2D_HAL_TRACE("hal frame %d hard_err= %d", p_hal->frame_no, param.hard_err); + + mpp_callback(p_hal->dec_cb, ¶m); + } + + memset(&p_regs->irq_status.reg224, 0, sizeof(RK_U32)); + + if (p_hal->fast_mode) + reg_ctx->reg_buf[task->dec.reg_index].valid = 0; + +__RETURN: + AVS2D_HAL_TRACE("Out. ret %d", ret); + return ret; +} diff --git a/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.h b/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.h new file mode 100644 index 00000000..0cddfc2a --- /dev/null +++ b/mpp/hal/rkdec/avs2d/hal_avs2d_vdpu382.h @@ -0,0 +1,44 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_AVS2D_VDPU382_H__ +#define __HAL_AVS2D_VDPU382_H__ + +#include "mpp_device.h" + +#include "parser_api.h" +#include "hal_avs2d_api.h" +#include "hal_avs2d_global.h" +#include "avs2d_syntax.h" +#include "vdpu382.h" + +#define AVS2D_REGISTERS (278) + +#ifdef __cplusplus +extern "C" { +#endif + +MPP_RET hal_avs2d_vdpu382_init (void *hal, MppHalCfg *cfg); +MPP_RET hal_avs2d_vdpu382_deinit (void *hal); +MPP_RET hal_avs2d_vdpu382_gen_regs(void *hal, HalTaskInfo *task); +MPP_RET hal_avs2d_vdpu382_start (void *hal, HalTaskInfo *task); +MPP_RET hal_avs2d_vdpu382_wait (void *hal, HalTaskInfo *task); + +#ifdef __cplusplus +} +#endif + +#endif /*__HAL_AVS2D_VDPU382_H__*/ diff --git a/mpp/hal/rkdec/h264d/CMakeLists.txt b/mpp/hal/rkdec/h264d/CMakeLists.txt index e3ab2b4f..12118bde 100644 --- a/mpp/hal/rkdec/h264d/CMakeLists.txt +++ b/mpp/hal/rkdec/h264d/CMakeLists.txt @@ -4,6 +4,7 @@ set(HAL_H264D_SRC hal_h264d_api.c hal_h264d_vdpu34x.c + hal_h264d_vdpu382.c hal_h264d_rkv_reg.c hal_h264d_vdpu2.c hal_h264d_vdpu1.c diff --git a/mpp/hal/rkdec/h264d/hal_h264d_api.c b/mpp/hal/rkdec/h264d/hal_h264d_api.c index eaddb766..0c1d0ed6 100644 --- a/mpp/hal/rkdec/h264d/hal_h264d_api.c +++ b/mpp/hal/rkdec/h264d/hal_h264d_api.c @@ -38,6 +38,7 @@ #include "hal_h264d_rkv_reg.h" #include "hal_h264d_vdpu34x.h" +#include "hal_h264d_vdpu382.h" #include "hal_h264d_vdpu2.h" #include "hal_h264d_vdpu1.h" @@ -133,7 +134,16 @@ MPP_RET hal_h264d_init(void *hal, MppHalCfg *cfg) case VPU_CLIENT_RKVDEC : { RK_U32 hw_id = mpp_get_client_hw_id(client_type); - if (hw_id == HWID_VDPU34X || hw_id == HWID_VDPU38X) { + if (hw_id == HWID_VDPU382) { + p_api->init = vdpu382_h264d_init; + p_api->deinit = vdpu382_h264d_deinit; + p_api->reg_gen = vdpu382_h264d_gen_regs; + p_api->start = vdpu382_h264d_start; + p_api->wait = vdpu382_h264d_wait; + p_api->reset = vdpu382_h264d_reset; + p_api->flush = vdpu382_h264d_flush; + p_api->control = vdpu382_h264d_control; + } else if (hw_id == HWID_VDPU34X || hw_id == HWID_VDPU38X) { p_api->init = vdpu34x_h264d_init; p_api->deinit = vdpu34x_h264d_deinit; p_api->reg_gen = vdpu34x_h264d_gen_regs; diff --git a/mpp/hal/rkdec/h264d/hal_h264d_vdpu382.c b/mpp/hal/rkdec/h264d/hal_h264d_vdpu382.c new file mode 100644 index 00000000..d15211d0 --- /dev/null +++ b/mpp/hal/rkdec/h264d/hal_h264d_vdpu382.c @@ -0,0 +1,1230 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define MODULE_TAG "hal_h264d_vdpu382" + +#include +#include +#include + +#include "rk_type.h" +#include "mpp_err.h" +#include "mpp_mem.h" +#include "mpp_common.h" +#include "mpp_bitput.h" + +#include "mpp_device.h" + +#include "hal_h264d_global.h" +#include "hal_h264d_vdpu382.h" +#include "vdpu382_h264d.h" +#include "mpp_dec_cb_param.h" + +/* Number registers for the decoder */ +#define DEC_VDPU382_REGISTERS 276 + +#define VDPU382_CABAC_TAB_SIZE (928*4 + 128) /* bytes */ +#define VDPU382_SPSPPS_SIZE (256*48 + 128) /* bytes */ +#define VDPU382_RPS_SIZE (128 + 128 + 128) /* bytes */ +#define VDPU382_SCALING_LIST_SIZE (6*16+2*64 + 128) /* bytes */ +#define VDPU382_ERROR_INFO_SIZE (256*144*4) /* bytes */ +#define H264_CTU_SIZE 16 + +#define VDPU382_CABAC_TAB_ALIGNED_SIZE (MPP_ALIGN(VDPU382_CABAC_TAB_SIZE, SZ_4K)) +#define VDPU382_ERROR_INFO_ALIGNED_SIZE (0) +#define VDPU382_SPSPPS_ALIGNED_SIZE (MPP_ALIGN(VDPU382_SPSPPS_SIZE, SZ_4K)) +#define VDPU382_RPS_ALIGNED_SIZE (MPP_ALIGN(VDPU382_RPS_SIZE, SZ_4K)) +#define VDPU382_SCALING_LIST_ALIGNED_SIZE (MPP_ALIGN(VDPU382_SCALING_LIST_SIZE, SZ_4K)) +#define VDPU382_STREAM_INFO_SET_SIZE (VDPU382_SPSPPS_ALIGNED_SIZE + \ + VDPU382_RPS_ALIGNED_SIZE + \ + VDPU382_SCALING_LIST_ALIGNED_SIZE) + +#define VDPU382_CABAC_TAB_OFFSET (0) +#define VDPU382_ERROR_INFO_OFFSET (VDPU382_CABAC_TAB_OFFSET + VDPU382_CABAC_TAB_ALIGNED_SIZE) +#define VDPU382_STREAM_INFO_OFFSET_BASE (VDPU382_ERROR_INFO_OFFSET + VDPU382_ERROR_INFO_ALIGNED_SIZE) +#define VDPU382_SPSPPS_OFFSET(pos) (VDPU382_STREAM_INFO_OFFSET_BASE + (VDPU382_STREAM_INFO_SET_SIZE * pos)) +#define VDPU382_RPS_OFFSET(pos) (VDPU382_SPSPPS_OFFSET(pos) + VDPU382_SPSPPS_ALIGNED_SIZE) +#define VDPU382_SCALING_LIST_OFFSET(pos) (VDPU382_RPS_OFFSET(pos) + VDPU382_RPS_ALIGNED_SIZE) +#define VDPU382_INFO_BUFFER_SIZE(cnt) (VDPU382_STREAM_INFO_OFFSET_BASE + (VDPU382_STREAM_INFO_SET_SIZE * cnt)) + +#define VDPU382_SPS_PPS_LEN (43) + +#define SET_REF_INFO(regs, index, field, value)\ + do{ \ + switch(index){\ + case 0: regs.reg99.ref0_##field = value; break;\ + case 1: regs.reg99.ref1_##field = value; break;\ + case 2: regs.reg99.ref2_##field = value; break;\ + case 3: regs.reg99.ref3_##field = value; break;\ + case 4: regs.reg100.ref4_##field = value; break;\ + case 5: regs.reg100.ref5_##field = value; break;\ + case 6: regs.reg100.ref6_##field = value; break;\ + case 7: regs.reg100.ref7_##field = value; break;\ + case 8: regs.reg101.ref8_##field = value; break;\ + case 9: regs.reg101.ref9_##field = value; break;\ + case 10: regs.reg101.ref10_##field = value; break;\ + case 11: regs.reg101.ref11_##field = value; break;\ + case 12: regs.reg102.ref12_##field = value; break;\ + case 13: regs.reg102.ref13_##field = value; break;\ + case 14: regs.reg102.ref14_##field = value; break;\ + case 15: regs.reg102.ref15_##field = value; break;\ + default: break;}\ + }while(0) + +#define SET_POC_HIGNBIT_INFO(regs, index, field, value)\ + do{ \ + switch(index){\ + case 0: regs.reg200.ref0_##field = value; break;\ + case 1: regs.reg200.ref1_##field = value; break;\ + case 2: regs.reg200.ref2_##field = value; break;\ + case 3: regs.reg200.ref3_##field = value; break;\ + case 4: regs.reg200.ref4_##field = value; break;\ + case 5: regs.reg200.ref5_##field = value; break;\ + case 6: regs.reg200.ref6_##field = value; break;\ + case 7: regs.reg200.ref7_##field = value; break;\ + case 8: regs.reg201.ref8_##field = value; break;\ + case 9: regs.reg201.ref9_##field = value; break;\ + case 10: regs.reg201.ref10_##field = value; break;\ + case 11: regs.reg201.ref11_##field = value; break;\ + case 12: regs.reg201.ref12_##field = value; break;\ + case 13: regs.reg201.ref13_##field = value; break;\ + case 14: regs.reg201.ref14_##field = value; break;\ + case 15: regs.reg201.ref15_##field = value; break;\ + case 16: regs.reg202.ref16_##field = value; break;\ + case 17: regs.reg202.ref17_##field = value; break;\ + case 18: regs.reg202.ref18_##field = value; break;\ + case 19: regs.reg202.ref19_##field = value; break;\ + case 20: regs.reg202.ref20_##field = value; break;\ + case 21: regs.reg202.ref21_##field = value; break;\ + case 22: regs.reg202.ref22_##field = value; break;\ + case 23: regs.reg202.ref23_##field = value; break;\ + case 24: regs.reg203.ref24_##field = value; break;\ + case 25: regs.reg203.ref25_##field = value; break;\ + case 26: regs.reg203.ref26_##field = value; break;\ + case 27: regs.reg203.ref27_##field = value; break;\ + case 28: regs.reg203.ref28_##field = value; break;\ + case 29: regs.reg203.ref29_##field = value; break;\ + case 30: regs.reg203.ref30_##field = value; break;\ + case 31: regs.reg203.ref31_##field = value; break;\ + default: break;}\ + }while(0) + +#define VDPU382_FAST_REG_SET_CNT 3 + +typedef struct h264d_rkv_buf_t { + RK_U32 valid; + Vdpu382H264dRegSet *regs; +} H264dRkvBuf_t; + +typedef struct Vdpu382H264dRegCtx_t { + RK_U8 spspps[48]; + RK_U8 rps[VDPU382_RPS_SIZE]; + RK_U8 sclst[VDPU382_SCALING_LIST_SIZE]; + + MppBuffer bufs; + RK_S32 bufs_fd; + void *bufs_ptr; + RK_U32 offset_cabac; + RK_U32 offset_errinfo; + RK_U32 offset_spspps[VDPU382_FAST_REG_SET_CNT]; + RK_U32 offset_rps[VDPU382_FAST_REG_SET_CNT]; + RK_U32 offset_sclst[VDPU382_FAST_REG_SET_CNT]; + + H264dRkvBuf_t reg_buf[VDPU382_FAST_REG_SET_CNT]; + + RK_U32 spspps_offset; + RK_U32 rps_offset; + RK_U32 sclst_offset; + + RK_S32 width; + RK_S32 height; + /* rcb buffers info */ + RK_U32 bit_depth; + RK_U32 mbaff; + RK_U32 chroma_format_idc; + + RK_S32 rcb_buf_size; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + MppBuffer rcb_buf[VDPU382_FAST_REG_SET_CNT]; + + Vdpu382H264dRegSet *regs; +} Vdpu382H264dRegCtx; + +const RK_U32 rkv_cabac_table_v382[928] = { + 0x3602f114, 0xf1144a03, 0x4a033602, 0x68e97fe4, 0x36ff35fa, 0x21173307, + 0x00150217, 0x31000901, 0x390576db, 0x41f54ef3, 0x310c3e01, 0x321149fc, + 0x2b094012, 0x431a001d, 0x68095a10, 0x68ec7fd2, 0x4ef34301, 0x3e0141f5, + 0x5fef56fa, 0x2d093dfa, 0x51fa45fd, 0x370660f5, 0x56fb4307, 0x3a005802, + 0x5ef64cfd, 0x45043605, 0x580051fd, 0x4afb43f9, 0x50fb4afc, 0x3a0148f9, + 0x3f002900, 0x3f003f00, 0x560453f7, 0x48f96100, 0x3e03290d, 0x4efc2d00, + 0x7ee560fd, 0x65e762e4, 0x52e443e9, 0x53f05eec, 0x5beb6eea, 0x5df366ee, + 0x5cf97fe3, 0x60f959fb, 0x2efd6cf3, 0x39ff41ff, 0x4afd5df7, 0x57f85cf7, + 0x36057ee9, 0x3b063c06, 0x30ff4506, 0x45fc4400, 0x55fe58f8, 0x4bff4efa, + 0x36024df9, 0x44fd3205, 0x2a063201, 0x3f0151fc, 0x430046fc, 0x4cfe3902, + 0x4004230b, 0x230b3d01, 0x180c1912, 0x240d1d0d, 0x49f95df6, 0x2e0d49fe, + 0x64f93109, 0x35023509, 0x3dfe3505, 0x38003800, 0x3cfb3ff3, 0x39043eff, + 0x390445fa, 0x3304270e, 0x4003440d, 0x3f093d01, 0x27103207, 0x34042c05, + 0x3cfb300b, 0x3b003bff, 0x2c052116, 0x4eff2b0e, 0x45093c00, 0x28021c0b, + 0x31002c03, 0x2c022e00, 0x2f003302, 0x3e022704, 0x36002e06, 0x3a023603, + 0x33063f04, 0x35073906, 0x37063406, 0x240e2d0b, 0x52ff3508, 0x4efd3707, + 0x1f162e0f, 0x071954ff, 0x031cf91e, 0x0020041c, 0x061eff22, 0x0920061e, + 0x1b1a131f, 0x14251e1a, 0x4611221c, 0x3b054301, 0x1e104309, 0x23122012, + 0x1f181d16, 0x2b122617, 0x3f0b2914, 0x40093b09, 0x59fe5eff, 0x4cfa6cf7, + 0x2d002cfe, 0x40fd3400, 0x46fc3bfe, 0x52f84bfc, 0x4df766ef, 0x2a001803, + 0x37003000, 0x47f93bfa, 0x57f553f4, 0x3a0177e2, 0x24ff1dfd, 0x2b022601, + 0x3a0037fa, 0x4afd4000, 0x46005af6, 0x1f051dfc, 0x3b012a07, 0x48fd3afe, + 0x61f551fd, 0x05083a00, 0x120e0e0a, 0x28021b0d, 0x46fd3a00, 0x55f84ffa, + 0x6af30000, 0x57f66af0, 0x6eee72eb, 0x6eea62f2, 0x67ee6aeb, 0x6ce96beb, + 0x60f670e6, 0x5bfb5ff4, 0x5eea5df7, 0x430956fb, 0x55f650fc, 0x3c0746ff, + 0x3d053a09, 0x320f320c, 0x36113112, 0x2e07290a, 0x310733ff, 0x29093408, + 0x37022f06, 0x2c0a290d, 0x35053206, 0x3f04310d, 0x45fe4006, 0x46063bfe, + 0x1f092c0a, 0x35032b0c, 0x260a220e, 0x280d34fd, 0x2c072011, 0x320d2607, + 0x2b1a390a, 0x0e0b0b0e, 0x0b120b09, 0xfe170915, 0xf120f120, 0xe927eb22, + 0xe129df2a, 0xf426e42e, 0xe82d1d15, 0xe630d335, 0xed2bd541, 0x091ef627, + 0x1b141a12, 0x52f23900, 0x61ed4bfb, 0x001b7ddd, 0xfc1f001c, 0x0822061b, + 0x16180a1e, 0x20161321, 0x29151f1a, 0x2f172c1a, 0x470e4110, 0x3f063c08, + 0x18154111, 0x171a1417, 0x171c201b, 0x2817181c, 0x1d1c2018, 0x39132a17, + 0x3d163516, 0x280c560b, 0x3b0e330b, 0x47f94ffc, 0x46f745fb, 0x44f642f8, + 0x45f449ed, 0x43f146f0, 0x46ed3eec, 0x41ea42f0, 0xfe093fec, 0xf721f71a, + 0xfe29f927, 0x0931032d, 0x3b241b2d, 0x23f942fa, 0x2df82af9, 0x38f430fb, + 0x3efb3cfa, 0x4cf842f8, 0x51fa55fb, 0x51f94df6, 0x49ee50ef, 0x53f64afc, + 0x43f747f7, 0x42f83dff, 0x3b0042f2, 0xf3153b02, 0xf927f221, 0x0233fe2e, + 0x113d063c, 0x3e2a2237, 0x00000000, 0x00000000, 0x3602f114, 0xf1144a03, + 0x4a033602, 0x68e97fe4, 0x36ff35fa, 0x19163307, 0x00100022, 0x290409fe, + 0x410276e3, 0x4ff347fa, 0x32093405, 0x360a46fd, 0x1613221a, 0x02390028, + 0x451a2429, 0x65f17fd3, 0x47fa4cfc, 0x34054ff3, 0x5af34506, 0x2b083400, + 0x52fb45fe, 0x3b0260f6, 0x57fd4b02, 0x380164fd, 0x55fa4afd, 0x51fd3b00, + 0x5ffb56f9, 0x4dff42ff, 0x56fe4601, 0x3d0048fb, 0x3f002900, 0x3f003f00, + 0x560453f7, 0x48f96100, 0x3e03290d, 0x33070f0d, 0x7fd95002, 0x60ef5bee, + 0x62dd51e6, 0x61e966e8, 0x63e877e5, 0x66ee6eeb, 0x50007fdc, 0x5ef959fb, + 0x27005cfc, 0x54f14100, 0x49fe7fdd, 0x5bf768f4, 0x37037fe1, 0x37073807, + 0x35fd3d08, 0x4af94400, 0x67f358f7, 0x59f75bf3, 0x4cf85cf2, 0x6ee957f4, + 0x4ef669e8, 0x63ef70ec, 0x7fba7fb2, 0x7fd27fce, 0x4efb42fc, 0x48f847fc, + 0x37ff3b02, 0x4bfa46f9, 0x77de59f8, 0x14204bfd, 0x7fd4161e, 0x3dfb3600, + 0x3cff3a00, 0x43f83dfd, 0x4af254e7, 0x340541fb, 0x3d003902, 0x46f545f7, + 0x47fc3712, 0x3d073a00, 0x19122909, 0x2b052009, 0x2c002f09, 0x2e023300, + 0x42fc2613, 0x2a0c260f, 0x59002209, 0x1c0a2d04, 0xf5211f0a, 0x0f12d534, + 0xea23001c, 0x0022e726, 0xf420ee27, 0x0000a266, 0xfc21f138, 0xfb250a1d, + 0xf727e333, 0xc645de34, 0xfb2cc143, 0xe3370720, 0x00000120, 0xe721241b, + 0xe424e222, 0xe526e426, 0xf023ee22, 0xf820f222, 0x0023fa25, 0x121c0a1e, + 0x291d191a, 0x48024b00, 0x230e4d08, 0x23111f12, 0x2d111e15, 0x2d122a14, + 0x36101a1b, 0x38104207, 0x430a490b, 0x70e974f6, 0x3df947f1, 0x42fb3500, + 0x50f74df5, 0x57f654f7, 0x65eb7fde, 0x35fb27fd, 0x4bf53df9, 0x5bef4df1, + 0x6fe76be7, 0x4cf57ae4, 0x34f62cf6, 0x3af739f6, 0x45f948f0, 0x4afb45fc, + 0x420256f7, 0x200122f7, 0x34051f0b, 0x43fe37fe, 0x59f84900, 0x04073403, + 0x0811080a, 0x25031310, 0x49fb3dff, 0x4efc46ff, 0x7eeb0000, 0x6eec7ce9, + 0x7ce77ee6, 0x79e569ef, 0x66ef75e5, 0x74e575e6, 0x5ff67adf, 0x5ff864f2, + 0x72e46fef, 0x50fe59fa, 0x55f752fc, 0x48ff51f8, 0x43014005, 0x45003809, + 0x45074501, 0x43fa45f9, 0x40fe4df0, 0x43fa3d02, 0x390240fd, 0x42fd41fd, + 0x33093e00, 0x47fe42ff, 0x46ff4bfe, 0x3c0e48f7, 0x2f002510, 0x250b2312, + 0x290a290c, 0x290c3002, 0x3b00290d, 0x28133203, 0x32124203, 0xfa12fa13, + 0xf41a000e, 0xe721f01f, 0xe425ea21, 0xe22ae227, 0xdc2dd62f, 0xef29de31, + 0xb9450920, 0xc042c13f, 0xd936b64d, 0xf629dd34, 0xff280024, 0x1a1c0e1e, + 0x370c2517, 0xdf25410b, 0xdb28dc27, 0xdf2ee226, 0xe828e22a, 0xf426e331, + 0xfd26f628, 0x141ffb2e, 0x2c191e1d, 0x310b300c, 0x16162d1a, 0x151b1617, + 0x1c1a1421, 0x221b181e, 0x27192a12, 0x460c3212, 0x470e3615, 0x2019530b, + 0x36153115, 0x51fa55fb, 0x51f94df6, 0x49ee50ef, 0x53f64afc, 0x43f747f7, + 0x42f83dff, 0x3b0042f2, 0xf6113b02, 0xf72af320, 0x0035fb31, 0x0a440340, + 0x392f1b42, 0x180047fb, 0x2afe24ff, 0x39f734fe, 0x41fc3ffa, 0x52f943fc, + 0x4cfd51fd, 0x4efa48f9, 0x44f248f4, 0x4cfa46fd, 0x3efb42fb, 0x3dfc3900, + 0x36013cf7, 0xf6113a02, 0xf72af320, 0x0035fb31, 0x0a440340, 0x392f1b42, + 0x00000000, 0x00000000, 0x3602f114, 0xf1144a03, 0x4a033602, 0x68e97fe4, + 0x36ff35fa, 0x101d3307, 0x000e0019, 0x3efd33f6, 0x101a63e5, 0x66e855fc, + 0x39063905, 0x390e49ef, 0x0a142814, 0x0036001d, 0x610c2a25, 0x75ea7fe0, + 0x55fc4afe, 0x390566e8, 0x58f25dfa, 0x37042cfa, 0x67f159f5, 0x391374eb, + 0x54043a14, 0x3f016006, 0x6af355fb, 0x4b063f05, 0x65ff5afd, 0x4ffc3703, + 0x61f44bfe, 0x3c0132f9, 0x3f002900, 0x3f003f00, 0x560453f7, 0x48f96100, + 0x3e03290d, 0x58f72207, 0x7fdc7fec, 0x5ff25bef, 0x56e754e7, 0x5bef59f4, + 0x4cf27fe1, 0x5af367ee, 0x500b7fdb, 0x54024c05, 0x37fa4e05, 0x53f23d04, + 0x4ffb7fdb, 0x5bf568f5, 0x41007fe2, 0x48004ffe, 0x38fa5cfc, 0x47f84403, + 0x56fc62f3, 0x52fb58f4, 0x43fc48fd, 0x59f048f8, 0x3bff45f7, 0x39044205, + 0x47fe47fc, 0x4aff3a02, 0x45ff2cfc, 0x33f93e00, 0x2afa2ffc, 0x35fa29fd, + 0x4ef74c08, 0x340953f5, 0x5afb4300, 0x48f14301, 0x50f84bfb, 0x40eb53eb, + 0x40e71ff3, 0x4b095ee3, 0x4af83f11, 0x1bfe23fb, 0x41035b0d, 0x4d0845f9, + 0x3e0342f6, 0x51ec44fd, 0x07011e00, 0x4aeb17fd, 0x7ce94210, 0xee2c2511, + 0x7feade32, 0x2a002704, 0x1d0b2207, 0x25061f08, 0x28032a07, 0x2b0d2108, + 0x2f04240d, 0x3a023703, 0x2c083c06, 0x2a0e2c0b, 0x38043007, 0x250d3404, + 0x3a133109, 0x2d0c300a, 0x21144500, 0xee233f08, 0xfd1ce721, 0x001b0a18, + 0xd434f222, 0x1113e827, 0x1d24191f, 0x0f222118, 0x4916141e, 0x1f132214, + 0x10132c1b, 0x240f240f, 0x15191c15, 0x0c1f141e, 0x2a18101b, 0x380e5d00, + 0x261a390f, 0x73e87fe8, 0x3ef752ea, 0x3b003500, 0x59f355f2, 0x5cf55ef3, + 0x64eb7fe3, 0x43f439f2, 0x4df647f5, 0x58f055eb, 0x62f168e9, 0x52f67fdb, + 0x3df830f8, 0x46f942f8, 0x4ff64bf2, 0x5cf453f7, 0x4ffc6cee, 0x4bf045ea, + 0x3a013afe, 0x53f74ef3, 0x63f351fc, 0x26fa51f3, 0x3afa3ef3, 0x49f03bfe, + 0x56f34cf6, 0x57f653f7, 0x7fea0000, 0x78e77fe7, 0x72ed7fe5, 0x76e775e9, + 0x71e875e6, 0x78e176e4, 0x5ef67cdb, 0x63f666f1, 0x7fce6af3, 0x39115cfb, + 0x5ef356fb, 0x4dfe5bf4, 0x49ff4700, 0x51f94004, 0x390f4005, 0x44004301, + 0x440143f6, 0x40024d00, 0x4efb4400, 0x3b053707, 0x360e4102, 0x3c052c0f, + 0x4cfe4602, 0x460c56ee, 0x46f44005, 0x3805370b, 0x41024500, 0x36054afa, + 0x4cfa3607, 0x4dfe52f5, 0x2a194dfe, 0xf710f311, 0xeb1bf411, 0xd829e225, + 0xd130d72a, 0xd82ee027, 0xd72ecd34, 0xed2bd934, 0xc93d0b20, 0xce3ed238, + 0xec2dbd51, 0x0f1cfe23, 0x01270122, 0x2614111e, 0x360f2d12, 0xf0244f00, + 0xef25f225, 0x0f220120, 0x19180f1d, 0x101f1622, 0x1c1f1223, 0x1c242921, + 0x3e152f1b, 0x1a131f12, 0x17181824, 0x1e18101b, 0x29161d1f, 0x3c102a16, + 0x3c0e340f, 0x7bf04e03, 0x38163515, 0x21153d19, 0x3d113213, 0x4af84efd, + 0x48f648f7, 0x47f44bee, 0x46fb3ff5, 0x48f24bef, 0x35f843f0, 0x34f73bf2, + 0xfe0944f5, 0xfc1ff61e, 0x0721ff21, 0x17250c1f, 0x4014261f, 0x25f947f7, + 0x31f52cf8, 0x3bf438f6, 0x43f73ff8, 0x4ff644fa, 0x4af84efd, 0x48f648f7, + 0x47f44bee, 0x46fb3ff5, 0x48f24bef, 0x35f843f0, 0x34f73bf2, 0xfe0944f5, + 0xfc1ff61e, 0x0721ff21, 0x17250c1f, 0x4014261f, 0x00000000, 0x00000000, + 0x3602f114, 0xf1144a03, 0x4a033602, 0x68e97fe4, 0x36ff35fa, 0x00003307, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x3f002900, 0x3f003f00, 0x560453f7, 0x48f96100, 0x3e03290d, 0x37010b00, + 0x7fef4500, 0x520066f3, 0x6beb4af9, 0x7fe17fe5, 0x5fee7fe8, 0x72eb7fe5, + 0x7bef7fe2, 0x7af073f4, 0x3ff473f5, 0x54f144fe, 0x46fd68f3, 0x5af65df8, + 0x4aff7fe2, 0x5bf961fa, 0x38fc7fec, 0x4cf952fb, 0x5df97dea, 0x4dfd57f5, + 0x3ffc47fb, 0x54f444fc, 0x41f93ef9, 0x38053d08, 0x400142fe, 0x4efe3d00, + 0x34073201, 0x2c00230a, 0x2d01260b, 0x2c052e00, 0x3301111f, 0x131c3207, + 0x3e0e2110, 0x64f16cf3, 0x5bf365f3, 0x58f65ef4, 0x56f654f0, 0x57f353f9, + 0x46015eed, 0x4afb4800, 0x66f83b12, 0x5f0064f1, 0x48024bfc, 0x47fd4bf5, + 0x45f32e0f, 0x41003e00, 0x48f12515, 0x36103909, 0x480c3e00, 0x090f0018, + 0x120d1908, 0x130d090f, 0x120c250a, 0x21141d06, 0x2d041e0f, 0x3e003a01, + 0x260c3d07, 0x270f2d0b, 0x2c0d2a0b, 0x290c2d10, 0x221e310a, 0x370a2a12, + 0x2e113311, 0xed1a5900, 0xef1aef16, 0xec1ce71e, 0xe525e921, 0xe428e921, + 0xf521ef26, 0xfa29f128, 0x11290126, 0x031bfa1e, 0xf025161a, 0xf826fc23, + 0x0325fd26, 0x002a0526, 0x16271023, 0x251b300e, 0x440c3c15, 0x47fd6102, + 0x32fb2afa, 0x3efe36fd, 0x3f013a00, 0x4aff48fe, 0x43fb5bf7, 0x27fd1bfb, + 0x2e002cfe, 0x44f840f0, 0x4dfa4ef6, 0x5cf456f6, 0x3cf637f1, 0x41fc3efa, + 0x4cf849f4, 0x58f750f9, 0x61f56eef, 0x4ff554ec, 0x4afc49fa, 0x60f356f3, + 0x75ed61f5, 0x21fb4ef8, 0x35fe30fc, 0x47f33efd, 0x56f44ff6, 0x61f25af3, + 0x5dfa0000, 0x4ff854fa, 0x47ff4200, 0x3cfe3e00, 0x4bfb3bfe, 0x3afc3efd, + 0x4fff42f7, 0x44034700, 0x3ef92c0a, 0x280e240f, 0x1d0c1b10, 0x24142c01, + 0x2a052012, 0x3e0a3001, 0x40092e11, 0x61f568f4, 0x58f960f0, 0x55f955f8, + 0x58f355f7, 0x4dfd4204, 0x4cfa4cfd, 0x4cff3a0a, 0x63f953ff, 0x5f025ff2, + 0x4afb4c00, 0x4bf54600, 0x41004401, 0x3e0349f2, 0x44ff3e04, 0x370b4bf3, + 0x460c4005, 0x1306060f, 0x0e0c1007, 0x0b0d0d12, 0x100f0f0d, 0x170d170c, + 0x1a0e140f, 0x28112c0e, 0x11182f11, 0x16191515, 0x1d161b1f, 0x320e2313, + 0x3f07390a, 0x52fc4dfe, 0x45095efd, 0xdd246df4, 0xe620de24, 0xe02ce225, + 0xf122ee22, 0xf921f128, 0x0021fb23, 0x0d210226, 0x3a0d2317, 0x001afd1d, + 0xf91f1e16, 0xfd22f123, 0xff240322, 0x0b200522, 0x0c220523, 0x1d1e0b27, + 0x271d1a22, 0x151f4213, 0x32191f1f, 0x70ec78ef, 0x55f572ee, 0x59f25cf1, + 0x51f147e6, 0x440050f2, 0x38e846f2, 0x32e844e9, 0xf3174af5, 0xf128f31a, + 0x032cf231, 0x222c062d, 0x52133621, 0x17ff4bfd, 0x2b012201, 0x37fe3600, + 0x40013d00, 0x5cf74400, 0x61f36af2, 0x5af45af1, 0x49f658ee, 0x56f24ff7, + 0x46f649f6, 0x42fb45f6, 0x3afb40f7, 0xf6153b02, 0xf81cf518, 0x031dff1c, + 0x1423091d, 0x430e241d, 0x00000000, 0x00000000 +}; + +static RK_U32 rkv_ver_align(RK_U32 val) +{ + return MPP_ALIGN(val, 16); +} + +static RK_U32 rkv_hor_align(RK_U32 val) +{ + return MPP_ALIGN(val, 16); +} + +static RK_U32 rkv_hor_align_256_odds(RK_U32 val) +{ + return (MPP_ALIGN(val, 256) | 256); +} + +static RK_U32 rkv_len_align(RK_U32 val) +{ + return (2 * MPP_ALIGN(val, 16)); +} + +static RK_U32 rkv_len_align_422(RK_U32 val) +{ + return ((5 * MPP_ALIGN(val, 16)) / 2); +} + +static MPP_RET prepare_spspps(H264dHalCtx_t *p_hal, RK_U64 *data, RK_U32 len) +{ + RK_S32 i = 0; + RK_S32 is_long_term = 0, voidx = 0; + DXVA_PicParams_H264_MVC *pp = p_hal->pp; + RK_U32 tmp = 0; + BitputCtx_t bp; + + mpp_set_bitput_ctx(&bp, data, len); + + if (!p_hal->fast_mode && !pp->spspps_update) { + bp.index = VDPU382_SPS_PPS_LEN >> 3; + bp.bitpos = (VDPU382_SPS_PPS_LEN & 0x7) << 3; + } else { + //!< sps syntax + mpp_put_bits(&bp, -1, 13); //!< sps_id 4bit && profile_idc 8bit && constraint_set3_flag 1bit + mpp_put_bits(&bp, pp->chroma_format_idc, 2); + mpp_put_bits(&bp, pp->bit_depth_luma_minus8, 3); + mpp_put_bits(&bp, pp->bit_depth_chroma_minus8, 3); + mpp_put_bits(&bp, 0, 1); //!< qpprime_y_zero_transform_bypass_flag + mpp_put_bits(&bp, pp->log2_max_frame_num_minus4, 4); + mpp_put_bits(&bp, pp->num_ref_frames, 5); + mpp_put_bits(&bp, pp->pic_order_cnt_type, 2); + mpp_put_bits(&bp, pp->log2_max_pic_order_cnt_lsb_minus4, 4); + mpp_put_bits(&bp, pp->delta_pic_order_always_zero_flag, 1); + mpp_put_bits(&bp, (pp->wFrameWidthInMbsMinus1 + 1), 12); + mpp_put_bits(&bp, (pp->wFrameHeightInMbsMinus1 + 1), 12); + mpp_put_bits(&bp, pp->frame_mbs_only_flag, 1); + mpp_put_bits(&bp, pp->MbaffFrameFlag, 1); + mpp_put_bits(&bp, pp->direct_8x8_inference_flag, 1); + + mpp_put_bits(&bp, 1, 1); //!< mvc_extension_enable + mpp_put_bits(&bp, (pp->num_views_minus1 + 1), 2); + mpp_put_bits(&bp, pp->view_id[0], 10); + mpp_put_bits(&bp, pp->view_id[1], 10); + mpp_put_bits(&bp, pp->num_anchor_refs_l0[0], 1); + if (pp->num_anchor_refs_l0[0]) { + mpp_put_bits(&bp, pp->anchor_ref_l0[0][0], 10); + } else { + mpp_put_bits(&bp, 0, 10); + } + mpp_put_bits(&bp, pp->num_anchor_refs_l1[0], 1); + if (pp->num_anchor_refs_l1[0]) { + mpp_put_bits(&bp, pp->anchor_ref_l1[0][0], 10); + } else { + mpp_put_bits(&bp, 0, 10); //!< anchor_ref_l1 + } + mpp_put_bits(&bp, pp->num_non_anchor_refs_l0[0], 1); + if (pp->num_non_anchor_refs_l0[0]) { + mpp_put_bits(&bp, pp->non_anchor_ref_l0[0][0], 10); + } else { + mpp_put_bits(&bp, 0, 10); //!< non_anchor_ref_l0 + } + mpp_put_bits(&bp, pp->num_non_anchor_refs_l1[0], 1); + if (pp->num_non_anchor_refs_l1[0]) { + mpp_put_bits(&bp, pp->non_anchor_ref_l1[0][0], 10); + } else { + mpp_put_bits(&bp, 0, 10);//!< non_anchor_ref_l1 + } + mpp_put_align(&bp, 128, 0); + //!< pps syntax + mpp_put_bits(&bp, -1, 13); //!< pps_id 8bit && sps_id 5bit + mpp_put_bits(&bp, pp->entropy_coding_mode_flag, 1); + mpp_put_bits(&bp, pp->pic_order_present_flag, 1); + mpp_put_bits(&bp, pp->num_ref_idx_l0_active_minus1, 5); + mpp_put_bits(&bp, pp->num_ref_idx_l1_active_minus1, 5); + mpp_put_bits(&bp, pp->weighted_pred_flag, 1); + mpp_put_bits(&bp, pp->weighted_bipred_idc, 2); + mpp_put_bits(&bp, pp->pic_init_qp_minus26, 7); + mpp_put_bits(&bp, pp->pic_init_qs_minus26, 6); + mpp_put_bits(&bp, pp->chroma_qp_index_offset, 5); + mpp_put_bits(&bp, pp->deblocking_filter_control_present_flag, 1); + mpp_put_bits(&bp, pp->constrained_intra_pred_flag, 1); + mpp_put_bits(&bp, pp->redundant_pic_cnt_present_flag, 1); + mpp_put_bits(&bp, pp->transform_8x8_mode_flag, 1); + mpp_put_bits(&bp, pp->second_chroma_qp_index_offset, 5); + mpp_put_bits(&bp, pp->scaleing_list_enable_flag, 1); + mpp_put_bits(&bp, 0, 32);// scanlist buffer has another addr + } + + //!< set dpb + for (i = 0; i < 16; i++) { + is_long_term = (pp->RefFrameList[i].bPicEntry != 0xff) ? pp->RefFrameList[i].AssociatedFlag : 0; + tmp |= (RK_U32)(is_long_term & 0x1) << i; + } + for (i = 0; i < 16; i++) { + voidx = (pp->RefFrameList[i].bPicEntry != 0xff) ? pp->RefPicLayerIdList[i] : 0; + tmp |= (RK_U32)(voidx & 0x1) << (i + 16); + } + mpp_put_bits(&bp, tmp, 32); + mpp_put_align(&bp, 64, 0); + + return MPP_OK; +} + +static MPP_RET prepare_framerps(H264dHalCtx_t *p_hal, RK_U64 *data, RK_U32 len) +{ + RK_S32 i = 0, j = 0; + RK_S32 dpb_idx = 0, voidx = 0; + RK_S32 dpb_valid = 0, bottom_flag = 0; + RK_U32 max_frame_num = 0; + RK_U16 frame_num_wrap = 0; + RK_U32 tmp = 0; + + BitputCtx_t bp; + DXVA_PicParams_H264_MVC *pp = p_hal->pp; + + mpp_set_bitput_ctx(&bp, data, len); + mpp_put_align(&bp, 128, 0); + max_frame_num = 1 << (pp->log2_max_frame_num_minus4 + 4); + for (i = 0; i < 16; i++) { + if ((pp->NonExistingFrameFlags >> i) & 0x01) { + frame_num_wrap = 0; + } else { + if (pp->RefFrameList[i].AssociatedFlag) { + frame_num_wrap = pp->FrameNumList[i]; + } else { + frame_num_wrap = (pp->FrameNumList[i] > pp->frame_num) ? + (pp->FrameNumList[i] - max_frame_num) : pp->FrameNumList[i]; + } + } + + mpp_put_bits(&bp, frame_num_wrap, 16); + } + + mpp_put_bits(&bp, 0, 16);//!< NULL + tmp = 0; + for (i = 0; i < 16; i++) { + tmp |= (RK_U32)pp->RefPicLayerIdList[i] << i; + } + mpp_put_bits(&bp, tmp, 16); + + for (i = 0; i < 32; i++) { + tmp = 0; + dpb_valid = (p_hal->slice_long[0].RefPicList[0][i].bPicEntry == 0xff) ? 0 : 1; + dpb_idx = dpb_valid ? p_hal->slice_long[0].RefPicList[0][i].Index7Bits : 0; + bottom_flag = dpb_valid ? p_hal->slice_long[0].RefPicList[0][i].AssociatedFlag : 0; + voidx = dpb_valid ? pp->RefPicLayerIdList[dpb_idx] : 0; + tmp |= (RK_U32)(dpb_idx | (dpb_valid << 4)) & 0x1f; + tmp |= (RK_U32)(bottom_flag & 0x1) << 5; + tmp |= (RK_U32)(voidx & 0x1) << 6; + mpp_put_bits(&bp, tmp, 7); + } + for (j = 1; j < 3; j++) { + for (i = 0; i < 32; i++) { + tmp = 0; + dpb_valid = (p_hal->slice_long[0].RefPicList[j][i].bPicEntry == 0xff) ? 0 : 1; + dpb_idx = dpb_valid ? p_hal->slice_long[0].RefPicList[j][i].Index7Bits : 0; + bottom_flag = dpb_valid ? p_hal->slice_long[0].RefPicList[j][i].AssociatedFlag : 0; + voidx = dpb_valid ? pp->RefPicLayerIdList[dpb_idx] : 0; + tmp |= (RK_U32)(dpb_idx | (dpb_valid << 4)) & 0x1f; + tmp |= (RK_U32)(bottom_flag & 0x1) << 5; + tmp |= (RK_U32)(voidx & 0x1) << 6; + mpp_put_bits(&bp, tmp, 7); + } + } + mpp_put_align(&bp, 128, 0); + + return MPP_OK; +} + +static MPP_RET prepare_scanlist(H264dHalCtx_t *p_hal, RK_U8 *data, RK_U32 len) +{ + RK_U32 i = 0, j = 0, n = 0; + + if (p_hal->pp->scaleing_list_enable_flag) { + for (i = 0; i < 6; i++) { //!< 4x4, 6 lists + for (j = 0; j < 16; j++) { + data[n++] = p_hal->qm->bScalingLists4x4[i][j]; + } + } + for (i = 0; i < 2; i++) { //!< 8x8, 2 lists + for (j = 0; j < 64; j++) { + data[n++] = p_hal->qm->bScalingLists8x8[i][j]; + } + } + } + mpp_assert(n <= len); + + return MPP_OK; +} + +static MPP_RET set_registers(H264dHalCtx_t *p_hal, Vdpu382H264dRegSet *regs, HalTaskInfo *task) +{ + DXVA_PicParams_H264_MVC *pp = p_hal->pp; + Vdpu382RegCommon *common = ®s->common; + HalBuf *mv_buf = NULL; + + // memset(regs, 0, sizeof(Vdpu382H264dRegSet)); + memset(®s->h264d_highpoc, 0, sizeof(regs->h264d_highpoc)); + common->reg016_str_len = p_hal->strm_len; + common->reg013.cur_pic_is_idr = p_hal->slice_long->idr_flag; + common->reg012.colmv_compress_en = (pp->frame_mbs_only_flag) ? 1 : 0; + //!< caculate the yuv_frame_size + { + MppFrame mframe = NULL; + RK_U32 hor_virstride = 0; + RK_U32 ver_virstride = 0; + RK_U32 y_virstride = 0; + + mpp_buf_slot_get_prop(p_hal->frame_slots, pp->CurrPic.Index7Bits, SLOT_FRAME_PTR, &mframe); + hor_virstride = mpp_frame_get_hor_stride(mframe); + ver_virstride = mpp_frame_get_ver_stride(mframe); + y_virstride = hor_virstride * ver_virstride; + + if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) { + RK_U32 pixel_width = MPP_ALIGN(mpp_frame_get_width(mframe), 64); + RK_U32 fbd_offset = MPP_ALIGN(pixel_width * (ver_virstride + 16) / 16, SZ_4K); + + common->reg012.fbc_e = 1; + common->reg018.y_hor_virstride = pixel_width / 16; + common->reg019.uv_hor_virstride = pixel_width / 16; + common->reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4; + } else { + common->reg012.fbc_e = 0; + common->reg018.y_hor_virstride = hor_virstride / 16; + common->reg019.uv_hor_virstride = hor_virstride / 16; + common->reg020_y_virstride.y_virstride = y_virstride / 16; + } + } + //!< set current + { + MppBuffer mbuffer = NULL; + RK_S32 fd = -1; + + regs->h264d_param.reg65.cur_top_poc = pp->CurrFieldOrderCnt[0]; + regs->h264d_param.reg66.cur_bot_poc = pp->CurrFieldOrderCnt[1]; + mpp_buf_slot_get_prop(p_hal->frame_slots, pp->CurrPic.Index7Bits, SLOT_BUFFER, &mbuffer); + fd = mpp_buffer_get_fd(mbuffer); + regs->common_addr.reg130_decout_base = fd; + + //colmv_cur_base + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, pp->CurrPic.Index7Bits); + regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]); + regs->common_addr.reg132_error_ref_base = fd; + if (pp->field_pic_flag) + regs->h264d_highpoc.reg204.cur_poc_highbit = 1 << pp->CurrPic.AssociatedFlag; // top:1 bot:2 + else + regs->h264d_highpoc.reg204.cur_poc_highbit = 0; // frame + } + //!< set reference + { + RK_S32 i = 0; + RK_S32 ref_index = -1; + RK_S32 near_index = -1; + MppBuffer mbuffer = NULL; + RK_U32 min_frame_num = 0; + MppFrame mframe = NULL; + + for (i = 0; i < 15; i++) { + RK_U32 field_flag = (pp->RefPicFiledFlags >> i) & 0x01; + RK_U32 top_used = (pp->UsedForReferenceFlags >> (2 * i + 0)) & 0x01; + RK_U32 bot_used = (pp->UsedForReferenceFlags >> (2 * i + 1)) & 0x01; + + regs->h264d_param.reg67_98_ref_poc[2 * i] = pp->FieldOrderCntList[i][0]; + regs->h264d_param.reg67_98_ref_poc[2 * i + 1] = pp->FieldOrderCntList[i][1]; + SET_REF_INFO(regs->h264d_param, i, field, field_flag); + SET_REF_INFO(regs->h264d_param, i, topfield_used, top_used); + SET_REF_INFO(regs->h264d_param, i, botfield_used, bot_used); + SET_REF_INFO(regs->h264d_param, i, colmv_use_flag, (pp->RefPicColmvUsedFlags >> i) & 0x01); + + if (pp->RefFrameList[i].bPicEntry != 0xff) { + ref_index = pp->RefFrameList[i].Index7Bits; + near_index = pp->RefFrameList[i].Index7Bits; + } else { + ref_index = (near_index < 0) ? pp->CurrPic.Index7Bits : near_index; + } + /* mark 3 to differ from current frame */ + if (ref_index == pp->CurrPic.Index7Bits) { + SET_POC_HIGNBIT_INFO(regs->h264d_highpoc, 2 * i, poc_highbit, 3); + SET_POC_HIGNBIT_INFO(regs->h264d_highpoc, 2 * i + 1, poc_highbit, 3); + } + mpp_buf_slot_get_prop(p_hal->frame_slots, ref_index, SLOT_BUFFER, &mbuffer); + mpp_buf_slot_get_prop(p_hal->frame_slots, ref_index, SLOT_FRAME_PTR, &mframe); + + if (pp->FrameNumList[i] < pp->frame_num && + pp->FrameNumList[i] > min_frame_num && + (!mpp_frame_get_errinfo(mframe))) { + min_frame_num = pp->FrameNumList[i]; + regs->common_addr.reg132_error_ref_base = mpp_buffer_get_fd(mbuffer); + common->reg021.error_intra_mode = 0; + } + + RK_S32 fd = mpp_buffer_get_fd(mbuffer); + regs->h264d_addr.ref_base[i] = fd; + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, ref_index); + regs->h264d_addr.colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + + } + regs->h264d_param.reg67_98_ref_poc[30] = pp->FieldOrderCntList[15][0]; + regs->h264d_param.reg67_98_ref_poc[31] = pp->FieldOrderCntList[15][1]; + regs->h264d_param.reg102.ref15_field = (pp->RefPicFiledFlags >> 15) & 0x01; + regs->h264d_param.reg102.ref15_topfield_used = (pp->UsedForReferenceFlags >> 30) & 0x01; + regs->h264d_param.reg102.ref15_botfield_used = (pp->UsedForReferenceFlags >> 31) & 0x01; + regs->h264d_param.reg102.ref15_colmv_use_flag = (pp->RefPicColmvUsedFlags >> 15) & 0x01; + + if (pp->RefFrameList[15].bPicEntry != 0xff) { + ref_index = pp->RefFrameList[15].Index7Bits; + } else { + ref_index = (near_index < 0) ? pp->CurrPic.Index7Bits : near_index; + } + /* mark 3 to differ from current frame */ + if (ref_index == pp->CurrPic.Index7Bits) { + regs->h264d_highpoc.reg203.ref30_poc_highbit = 3; + regs->h264d_highpoc.reg203.ref31_poc_highbit = 3; + } + mpp_buf_slot_get_prop(p_hal->frame_slots, ref_index, SLOT_BUFFER, &mbuffer); + RK_S32 fd = mpp_buffer_get_fd(mbuffer); + regs->h264d_addr.ref_base[15] = fd; + mv_buf = hal_bufs_get_buf(p_hal->cmv_bufs, ref_index); + regs->h264d_addr.colmv_base[15] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + { + MppBuffer mbuffer = NULL; + Vdpu382H264dRegCtx *reg_ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + + mpp_buf_slot_get_prop(p_hal->packet_slots, task->dec.input, SLOT_BUFFER, &mbuffer); + regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(mbuffer); + regs->common_addr.reg129_rlcwrite_base = regs->common_addr.reg128_rlc_base; + + regs->h264d_addr.cabactbl_base = reg_ctx->bufs_fd; + mpp_dev_set_reg_offset(p_hal->dev, 197, reg_ctx->offset_cabac); + } + + return MPP_OK; +} + +static MPP_RET init_common_regs(Vdpu382H264dRegSet *regs) +{ + Vdpu382RegCommon *common = ®s->common; + Vdpu382H264dHighPoc_t *highpoc = ®s->h264d_highpoc; + + common->reg009.dec_mode = 1; //!< h264 + common->reg015.rlc_mode = 0; + + common->reg011.buf_empty_en = 1; + + common->reg010.dec_e = 1; + common->reg017.slice_num = 0x3fff; + + common->reg013.h26x_error_mode = 1; + common->reg013.strmd_zero_rm_en = 1; + + common->reg021.error_deb_en = 1; + common->reg021.inter_error_prc_mode = 0; + common->reg021.error_intra_mode = 1; + + common->reg024.cabac_err_en_lowbits = 0xffffffff; + common->reg025.cabac_err_en_highbits = 0x3ff3ffff; + common->reg026.swreg_block_gating_e = 0xfffff; + common->reg026.reg_cfg_gating_en = 1; + common->reg032_timeout_threshold = 0x3ffff; + + common->reg011.dec_clkgate_e = 1; + + //highpoc_t205 + memset(&highpoc->reg205, 0, sizeof(RK_U32)); + + + return MPP_OK; +} + +MPP_RET vdpu382_h264d_init(void *hal, MppHalCfg *cfg) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + + MEM_CHECK(ret, p_hal->reg_ctx = mpp_calloc_size(void, sizeof(Vdpu382H264dRegCtx))); + Vdpu382H264dRegCtx *reg_ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + RK_U32 max_cnt = p_hal->fast_mode ? VDPU382_FAST_REG_SET_CNT : 1; + RK_U32 i = 0; + + //!< malloc buffers + FUN_CHECK(ret = mpp_buffer_get(p_hal->buf_group, ®_ctx->bufs, + VDPU382_INFO_BUFFER_SIZE(max_cnt))); + reg_ctx->bufs_fd = mpp_buffer_get_fd(reg_ctx->bufs); + reg_ctx->bufs_ptr = mpp_buffer_get_ptr(reg_ctx->bufs); + reg_ctx->offset_cabac = VDPU382_CABAC_TAB_OFFSET; + reg_ctx->offset_errinfo = VDPU382_ERROR_INFO_OFFSET; + for (i = 0; i < max_cnt; i++) { + reg_ctx->reg_buf[i].regs = mpp_calloc(Vdpu382H264dRegSet, 1); + init_common_regs(reg_ctx->reg_buf[i].regs); + reg_ctx->offset_spspps[i] = VDPU382_SPSPPS_OFFSET(i); + reg_ctx->offset_rps[i] = VDPU382_RPS_OFFSET(i); + reg_ctx->offset_sclst[i] = VDPU382_SCALING_LIST_OFFSET(i); + } + + if (!p_hal->fast_mode) { + reg_ctx->regs = reg_ctx->reg_buf[0].regs; + reg_ctx->spspps_offset = reg_ctx->offset_spspps[0]; + reg_ctx->rps_offset = reg_ctx->offset_rps[0]; + reg_ctx->sclst_offset = reg_ctx->offset_sclst[0]; + } + + //!< copy cabac table bytes + memcpy((char *)reg_ctx->bufs_ptr + reg_ctx->offset_cabac, + (void *)rkv_cabac_table_v382, sizeof(rkv_cabac_table_v382)); + + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, rkv_hor_align); + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_VER_ALIGN, rkv_ver_align); + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, rkv_len_align); + + { + // report hw_info to parser + const MppSocInfo *info = mpp_get_soc_info(); + const void *hw_info = NULL; + + for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) { + if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) { + hw_info = info->dec_caps[i]; + break; + } + } + + mpp_assert(hw_info); + cfg->hw_info = hw_info; + } + +__RETURN: + return MPP_OK; +__FAILED: + vdpu382_h264d_deinit(hal); + + return ret; +} + +MPP_RET vdpu382_h264d_deinit(void *hal) +{ + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + Vdpu382H264dRegCtx *reg_ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + + RK_U32 i = 0; + RK_U32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->reg_buf) : 1; + + mpp_buffer_put(reg_ctx->bufs); + + for (i = 0; i < loop; i++) + MPP_FREE(reg_ctx->reg_buf[i].regs); + + loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(reg_ctx->rcb_buf) : 1; + for (i = 0; i < loop; i++) { + if (reg_ctx->rcb_buf[i]) { + mpp_buffer_put(reg_ctx->rcb_buf[i]); + reg_ctx->rcb_buf[i] = NULL; + } + } + + if (p_hal->cmv_bufs) { + hal_bufs_deinit(p_hal->cmv_bufs); + p_hal->cmv_bufs = NULL; + } + + MPP_FREE(p_hal->reg_ctx); + + return MPP_OK; +} + +static void h264d_refine_rcb_size(H264dHalCtx_t *p_hal, Vdpu382RcbInfo *rcb_info, + Vdpu382H264dRegSet *regs, + RK_S32 width, RK_S32 height) +{ + RK_U32 rcb_bits = 0; + RK_U32 mbaff = p_hal->pp->MbaffFrameFlag; + RK_U32 bit_depth = p_hal->pp->bit_depth_luma_minus8 + 8; + RK_U32 chroma_format_idc = p_hal->pp->chroma_format_idc; + + width = MPP_ALIGN(width, H264_CTU_SIZE); + height = MPP_ALIGN(height, H264_CTU_SIZE); + /* RCB_STRMD_ROW */ + if (width > 4096) + rcb_bits = ((width + 15) / 16) * 154 * (mbaff ? 2 : 1); + else + rcb_bits = 0; + rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_TRANSD_ROW */ + if (width > 8192) + rcb_bits = ((width - 8192 + 3) / 4) * 2; + else + rcb_bits = 0; + rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_TRANSD_COL */ + if (height > 8192) + rcb_bits = ((height - 8192 + 3) / 4) * 2; + else + rcb_bits = 0; + rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_ROW */ + rcb_bits = width * 42; + rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_COL */ + rcb_info[RCB_INTER_COL].size = 0; + /* RCB_INTRA_ROW */ + rcb_bits = width * 44; + rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_DBLK_ROW */ + rcb_bits = width * (2 + (mbaff ? 12 : 6) * bit_depth); + rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_SAO_ROW */ + rcb_info[RCB_SAO_ROW].size = 0; + /* RCB_FBC_ROW */ + if (regs->common.reg012.fbc_e) { + rcb_bits = (chroma_format_idc > 1) ? (2 * width * bit_depth) : 0; + } else + rcb_bits = 0; + rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_FILT_COL */ + rcb_info[RCB_FILT_COL].size = 0; +} + +static void hal_h264d_rcb_info_update(void *hal, Vdpu382H264dRegSet *regs) +{ + H264dHalCtx_t *p_hal = (H264dHalCtx_t*)hal; + RK_U32 mbaff = p_hal->pp->MbaffFrameFlag; + RK_U32 bit_depth = p_hal->pp->bit_depth_luma_minus8 + 8; + RK_U32 chroma_format_idc = p_hal->pp->chroma_format_idc; + Vdpu382H264dRegCtx *ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + RK_S32 width = MPP_ALIGN((p_hal->pp->wFrameWidthInMbsMinus1 + 1) << 4, 64); + RK_S32 height = MPP_ALIGN((p_hal->pp->wFrameHeightInMbsMinus1 + 1) << 4, 64); + + if ( ctx->bit_depth != bit_depth || + ctx->chroma_format_idc != chroma_format_idc || + ctx->mbaff != mbaff || + ctx->width != width || + ctx->height != height) { + RK_U32 i; + RK_U32 loop = p_hal->fast_mode ? MPP_ARRAY_ELEMS(ctx->reg_buf) : 1; + + ctx->rcb_buf_size = vdpu382_get_rcb_buf_size(ctx->rcb_info, width, height); + h264d_refine_rcb_size(hal, ctx->rcb_info, regs, width, height); + for (i = 0; i < loop; i++) { + MppBuffer rcb_buf = ctx->rcb_buf[i]; + + if (rcb_buf) { + mpp_buffer_put(rcb_buf); + ctx->rcb_buf[i] = NULL; + } + mpp_buffer_get(p_hal->buf_group, &rcb_buf, ctx->rcb_buf_size); + ctx->rcb_buf[i] = rcb_buf; + } + ctx->bit_depth = bit_depth; + ctx->width = width; + ctx->height = height; + ctx->mbaff = mbaff; + ctx->chroma_format_idc = chroma_format_idc; + } +} + +MPP_RET vdpu382_h264d_gen_regs(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + RK_S32 width = MPP_ALIGN((p_hal->pp->wFrameWidthInMbsMinus1 + 1) << 4, 64); + RK_S32 height = MPP_ALIGN((p_hal->pp->wFrameHeightInMbsMinus1 + 1) << 4, 64); + Vdpu382H264dRegCtx *ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + Vdpu382H264dRegSet *regs = ctx->regs; + RK_S32 mv_size = width * height / 2; + INP_CHECK(ret, NULL == p_hal); + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + goto __RETURN; + } + + /* if is field mode is enabled enlarge colmv buffer and disable colmv compression */ + if (!p_hal->pp->frame_mbs_only_flag) + mv_size *= 2; + + if (p_hal->cmv_bufs == NULL || p_hal->mv_size < mv_size) { + size_t size = mv_size; + + if (p_hal->cmv_bufs) { + hal_bufs_deinit(p_hal->cmv_bufs); + p_hal->cmv_bufs = NULL; + } + + hal_bufs_init(&p_hal->cmv_bufs); + if (p_hal->cmv_bufs == NULL) { + mpp_err_f("colmv bufs init fail"); + goto __RETURN; + } + p_hal->mv_size = mv_size; + p_hal->mv_count = mpp_buf_slot_get_count(p_hal->frame_slots); + hal_bufs_setup(p_hal->cmv_bufs, p_hal->mv_count, 1, &size); + } + + if (p_hal->fast_mode) { + RK_U32 i = 0; + for (i = 0; i < MPP_ARRAY_ELEMS(ctx->reg_buf); i++) { + if (!ctx->reg_buf[i].valid) { + task->dec.reg_index = i; + regs = ctx->reg_buf[i].regs; + + ctx->spspps_offset = ctx->offset_spspps[i]; + ctx->rps_offset = ctx->offset_rps[i]; + ctx->sclst_offset = ctx->offset_sclst[i]; + ctx->reg_buf[i].valid = 1; + break; + } + } + } + prepare_spspps(p_hal, (RK_U64 *)&ctx->spspps, sizeof(ctx->spspps)); + prepare_framerps(p_hal, (RK_U64 *)&ctx->rps, sizeof(ctx->rps)); + prepare_scanlist(p_hal, ctx->sclst, sizeof(ctx->sclst)); + set_registers(p_hal, regs, task); + + //!< copy datas + RK_U32 i = 0; + if (!p_hal->fast_mode && !p_hal->pp->spspps_update) { + RK_U32 offset = 0; + RK_U32 len = VDPU382_SPS_PPS_LEN; //!< sps+pps data length + for (i = 0; i < 256; i++) { + offset = ctx->spspps_offset + (sizeof(ctx->spspps) * i) + len; + memcpy((char *)ctx->bufs_ptr + offset, (char *)ctx->spspps + len, sizeof(ctx->spspps) - len); + } + } else { + RK_U32 offset = 0; + for (i = 0; i < 256; i++) { + offset = ctx->spspps_offset + (sizeof(ctx->spspps) * i); + memcpy((char *)ctx->bufs_ptr + offset, (void *)ctx->spspps, sizeof(ctx->spspps)); + } + } + + regs->h264d_addr.pps_base = ctx->bufs_fd; + MppDevRegOffsetCfg trans_cfg; + trans_cfg.reg_idx = 161; + trans_cfg.offset = ctx->spspps_offset; + mpp_dev_ioctl(p_hal->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + + memcpy((char *)ctx->bufs_ptr + ctx->rps_offset, (void *)ctx->rps, sizeof(ctx->rps)); + regs->h264d_addr.rps_base = ctx->bufs_fd; + trans_cfg.reg_idx = 163; + trans_cfg.offset = ctx->rps_offset; + mpp_dev_ioctl(p_hal->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + + regs->common.reg012.scanlist_addr_valid_en = 1; + if (p_hal->pp->scaleing_list_enable_flag) { + memcpy((char *)ctx->bufs_ptr + ctx->sclst_offset, (void *)ctx->sclst, sizeof(ctx->sclst)); + regs->h264d_addr.scanlist_addr = ctx->bufs_fd; + trans_cfg.reg_idx = 180; + trans_cfg.offset = ctx->sclst_offset; + mpp_dev_ioctl(p_hal->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } else { + regs->h264d_addr.scanlist_addr = 0; + } + + hal_h264d_rcb_info_update(p_hal, regs); + vdpu382_setup_rcb(®s->common_addr, p_hal->dev, p_hal->fast_mode ? + ctx->rcb_buf[task->dec.reg_index] : ctx->rcb_buf[0], + ctx->rcb_info); + vdpu382_setup_statistic(®s->common, ®s->statistic); + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu382_h264d_start(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + INP_CHECK(ret, NULL == p_hal); + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + goto __RETURN; + } + + Vdpu382H264dRegCtx *reg_ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + Vdpu382H264dRegSet *regs = p_hal->fast_mode ? + reg_ctx->reg_buf[task->dec.reg_index].regs : + reg_ctx->regs; + MppDev dev = p_hal->dev; + + do { + MppDevRegWrCfg wr_cfg; + MppDevRegRdCfg rd_cfg; + + wr_cfg.reg = ®s->common; + wr_cfg.size = sizeof(regs->common); + wr_cfg.offset = OFFSET_COMMON_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->h264d_param; + wr_cfg.size = sizeof(regs->h264d_param); + wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->common_addr; + wr_cfg.size = sizeof(regs->common_addr); + wr_cfg.offset = OFFSET_COMMON_ADDR_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->h264d_addr; + wr_cfg.size = sizeof(regs->h264d_addr); + wr_cfg.offset = OFFSET_CODEC_ADDR_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->h264d_highpoc; + wr_cfg.size = sizeof(regs->h264d_highpoc); + wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS; + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = ®s->statistic; + wr_cfg.size = sizeof(regs->statistic); + wr_cfg.offset = OFFSET_STATISTIC_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + rd_cfg.reg = ®s->irq_status; + rd_cfg.size = sizeof(regs->irq_status); + rd_cfg.offset = OFFSET_INTERRUPT_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + /* rcb info for sram */ + { + RK_U32 i = 0; + MppDevRcbInfoCfg rcb_cfg; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + + memcpy(rcb_info, reg_ctx->rcb_info, sizeof(rcb_info)); + qsort(rcb_info, MPP_ARRAY_ELEMS(rcb_info), + sizeof(rcb_info[0]), vdpu382_compare_rcb_size); + + for (i = 0; i < MPP_ARRAY_ELEMS(rcb_info); i++) { + rcb_cfg.reg_idx = rcb_info[i].reg; + rcb_cfg.size = rcb_info[i].size; + if (rcb_cfg.size > 0) { + mpp_dev_ioctl(dev, MPP_DEV_RCB_INFO, &rcb_cfg); + } else + break; + } + } + /* send request to hardware */ + ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL); + if (ret) { + mpp_err_f("send cmd failed %d\n", ret); + break; + } + } while (0); + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu382_h264d_wait(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + Vdpu382H264dRegCtx *reg_ctx = (Vdpu382H264dRegCtx *)p_hal->reg_ctx; + Vdpu382H264dRegSet *p_regs = p_hal->fast_mode ? + reg_ctx->reg_buf[task->dec.reg_index].regs : + reg_ctx->regs; + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + goto __SKIP_HARD; + } + + ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL); + if (ret) + mpp_err_f("poll cmd failed %d\n", ret); + +__SKIP_HARD: + if (p_hal->dec_cb) { + DecCbHalDone param; + + param.task = (void *)&task->dec; + param.regs = (RK_U32 *)p_regs; + + if (p_regs->irq_status.reg224.dec_error_sta || + (!p_regs->irq_status.reg224.dec_rdy_sta) || + p_regs->irq_status.reg224.buf_empty_sta || + p_regs->irq_status.reg226.strmd_error_status || + p_regs->irq_status.reg227.colmv_error_ref_picidx) + param.hard_err = 1; + else + param.hard_err = 0; + + mpp_callback(p_hal->dec_cb, ¶m); + } + memset(&p_regs->irq_status.reg224, 0, sizeof(RK_U32)); + if (p_hal->fast_mode) { + reg_ctx->reg_buf[task->dec.reg_index].valid = 0; + } + + (void)task; +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu382_h264d_reset(void *hal) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu382_h264d_flush(void *hal) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + +__RETURN: + return ret = MPP_OK; +} + +MPP_RET vdpu382_h264d_control(void *hal, MpiCmd cmd_type, void *param) +{ + MPP_RET ret = MPP_ERR_UNKNOW; + H264dHalCtx_t *p_hal = (H264dHalCtx_t *)hal; + + INP_CHECK(ret, NULL == p_hal); + + switch ((MpiCmd)cmd_type) { + case MPP_DEC_SET_FRAME_INFO: { + MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param); + RK_U32 imgwidth = mpp_frame_get_width((MppFrame)param); + RK_U32 imgheight = mpp_frame_get_height((MppFrame)param); + + mpp_log("control info: fmt %d, w %d, h %d\n", fmt, imgwidth, imgheight); + if (fmt == MPP_FMT_YUV422SP) { + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_LEN_ALIGN, rkv_len_align_422); + } + if (MPP_FRAME_FMT_IS_FBC(fmt)) { + vdpu382_afbc_align_calc(p_hal->frame_slots, (MppFrame)param, 16); + } else if (imgwidth > 1920 || imgheight > 1088) { + mpp_slots_set_prop(p_hal->frame_slots, SLOTS_HOR_ALIGN, rkv_hor_align_256_odds); + } + break; + } + case MPP_DEC_SET_OUTPUT_FORMAT: { + + } break; + default: + break; + } + +__RETURN: + return ret = MPP_OK; +} diff --git a/mpp/hal/rkdec/h264d/hal_h264d_vdpu382.h b/mpp/hal/rkdec/h264d/hal_h264d_vdpu382.h new file mode 100644 index 00000000..b437b6b7 --- /dev/null +++ b/mpp/hal/rkdec/h264d/hal_h264d_vdpu382.h @@ -0,0 +1,40 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_H264D_VDPU382_H__ +#define __HAL_H264D_VDPU382_H__ + +#include "mpp_hal.h" +#include "vdpu382.h" + +#ifdef __cplusplus +extern "C" { +#endif + +MPP_RET vdpu382_h264d_init (void *hal, MppHalCfg *cfg); +MPP_RET vdpu382_h264d_deinit (void *hal); +MPP_RET vdpu382_h264d_gen_regs(void *hal, HalTaskInfo *task); +MPP_RET vdpu382_h264d_start (void *hal, HalTaskInfo *task); +MPP_RET vdpu382_h264d_wait (void *hal, HalTaskInfo *task); +MPP_RET vdpu382_h264d_reset (void *hal); +MPP_RET vdpu382_h264d_flush (void *hal); +MPP_RET vdpu382_h264d_control (void *hal, MpiCmd cmd_type, void *param); + +#ifdef __cplusplus +} +#endif + +#endif /* __HAL_H264D_VDPU382_H__ */ \ No newline at end of file diff --git a/mpp/hal/rkdec/h265d/CMakeLists.txt b/mpp/hal/rkdec/h265d/CMakeLists.txt index f5882521..0e5d38ce 100644 --- a/mpp/hal/rkdec/h265d/CMakeLists.txt +++ b/mpp/hal/rkdec/h265d/CMakeLists.txt @@ -6,6 +6,7 @@ set(HAL_H265D_SRC hal_h265d_com.c hal_h265d_rkv.c hal_h265d_vdpu34x.c + hal_h265d_vdpu382.c ) add_library(${HAL_H265D} STATIC ${HAL_H265D_SRC}) diff --git a/mpp/hal/rkdec/h265d/hal_h265d_api.c b/mpp/hal/rkdec/h265d/hal_h265d_api.c index 47930ba5..22381e64 100644 --- a/mpp/hal/rkdec/h265d/hal_h265d_api.c +++ b/mpp/hal/rkdec/h265d/hal_h265d_api.c @@ -27,6 +27,7 @@ #include "hal_h265d_api.h" #include "hal_h265d_rkv.h" #include "hal_h265d_vdpu34x.h" +#include "hal_h265d_vdpu382.h" RK_U32 hal_h265d_debug = 0; @@ -58,7 +59,9 @@ MPP_RET hal_h265d_init(void *ctx, MppHalCfg *cfg) p->is_v34x = (hw_id == HWID_VDPU34X || hw_id == HWID_VDPU38X); p->client_type = client_type; - if (p->is_v34x) + if (hw_id == HWID_VDPU382) + p->api = &hal_h265d_vdpu382; + else if (p->is_v34x) p->api = &hal_h265d_vdpu34x; else p->api = &hal_h265d_rkv; diff --git a/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c b/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c new file mode 100644 index 00000000..99ea1a3c --- /dev/null +++ b/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.c @@ -0,0 +1,1182 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define MODULE_TAG "hal_h265d_vdpu382" + +#include +#include + +#include "mpp_env.h" +#include "mpp_mem.h" +#include "mpp_bitread.h" +#include "mpp_bitput.h" + +#include "h265d_syntax.h" +#include "hal_h265d_debug.h" +#include "hal_h265d_ctx.h" +#include "hal_h265d_com.h" +#include "hal_h265d_vdpu382.h" +#include "vdpu382_h265d.h" + +/* #define dump */ +#ifdef dump +static FILE *fp = NULL; +#endif + +#define HW_RPS +#define PPS_SIZE (112 * 64)//(96x64) + +#define SET_REF_VALID(regs, index, value)\ + do{ \ + switch(index){\ + case 0: regs.reg99.hevc_ref_valid_0 = value; break;\ + case 1: regs.reg99.hevc_ref_valid_1 = value; break;\ + case 2: regs.reg99.hevc_ref_valid_2 = value; break;\ + case 3: regs.reg99.hevc_ref_valid_3 = value; break;\ + case 4: regs.reg99.hevc_ref_valid_4 = value; break;\ + case 5: regs.reg99.hevc_ref_valid_5 = value; break;\ + case 6: regs.reg99.hevc_ref_valid_6 = value; break;\ + case 7: regs.reg99.hevc_ref_valid_7 = value; break;\ + case 8: regs.reg99.hevc_ref_valid_8 = value; break;\ + case 9: regs.reg99.hevc_ref_valid_9 = value; break;\ + case 10: regs.reg99.hevc_ref_valid_10 = value; break;\ + case 11: regs.reg99.hevc_ref_valid_11 = value; break;\ + case 12: regs.reg99.hevc_ref_valid_12 = value; break;\ + case 13: regs.reg99.hevc_ref_valid_13 = value; break;\ + case 14: regs.reg99.hevc_ref_valid_14 = value; break;\ + default: break;}\ + }while(0) + +#define FMT 4 +#define CTU 3 + +typedef struct { + RK_U32 a; + RK_U32 b; +} FilterdColBufRatio; + +static const FilterdColBufRatio filterd_fbc_on[CTU][FMT] = { + /* 400 420 422 444 */ + {{0, 0}, {27, 15}, {36, 15}, {52, 15}}, //ctu 16 + {{0, 0}, {27, 8}, {36, 8}, {52, 8}}, //ctu 32 + {{0, 0}, {27, 5}, {36, 5}, {52, 5}} //ctu 64 +}; + +static const FilterdColBufRatio filterd_fbc_off[CTU][FMT] = { + /* 400 420 422 444 */ + {{0, 0}, {9, 31}, {12, 39}, {12, 39}}, //ctu 16 + {{0, 0}, {9, 25}, {12, 33}, {12, 33}}, //ctu 32 + {{0, 0}, {9, 21}, {12, 29}, {12, 29}} //ctu 64 +}; + +#define CABAC_TAB_ALIGEND_SIZE (MPP_ALIGN(27456, SZ_4K)) +#define SPSPPS_ALIGNED_SIZE (MPP_ALIGN(112 * 64, SZ_4K)) +#define RPS_ALIGEND_SIZE (MPP_ALIGN(400 * 8, SZ_4K)) +#define SCALIST_ALIGNED_SIZE (MPP_ALIGN(81 * 1360, SZ_4K)) +#define INFO_BUFFER_SIZE (SPSPPS_ALIGNED_SIZE + RPS_ALIGEND_SIZE + SCALIST_ALIGNED_SIZE) +#define ALL_BUFFER_SIZE(cnt) (CABAC_TAB_ALIGEND_SIZE + INFO_BUFFER_SIZE *cnt) + +#define CABAC_TAB_OFFSET (0) +#define SPSPPS_OFFSET(pos) (CABAC_TAB_OFFSET + CABAC_TAB_ALIGEND_SIZE + (INFO_BUFFER_SIZE * pos)) +#define RPS_OFFSET(pos) (SPSPPS_OFFSET(pos) + SPSPPS_ALIGNED_SIZE) +#define SCALIST_OFFSET(pos) (RPS_OFFSET(pos) + RPS_ALIGEND_SIZE) + +static MPP_RET hal_h265d_vdpu382_init(void *hal, MppHalCfg *cfg) +{ + RK_S32 ret = 0; + HalH265dCtx *reg_cxt = (HalH265dCtx *)hal; + + mpp_slots_set_prop(reg_cxt->slots, SLOTS_HOR_ALIGN, hevc_hor_align); + mpp_slots_set_prop(reg_cxt->slots, SLOTS_VER_ALIGN, hevc_ver_align); + + reg_cxt->scaling_qm = mpp_calloc(DXVA_Qmatrix_HEVC, 1); + if (reg_cxt->scaling_qm == NULL) { + mpp_err("scaling_org alloc fail"); + return MPP_ERR_MALLOC; + } + + reg_cxt->scaling_rk = mpp_calloc(scalingFactor_t, 1); + reg_cxt->pps_buf = mpp_calloc(RK_U64, 15); + reg_cxt->sw_rps_buf = mpp_calloc(RK_U64, 400); + + if (reg_cxt->scaling_rk == NULL) { + mpp_err("scaling_rk alloc fail"); + return MPP_ERR_MALLOC; + } + + if (reg_cxt->group == NULL) { + ret = mpp_buffer_group_get_internal(®_cxt->group, MPP_BUFFER_TYPE_ION); + if (ret) { + mpp_err("h265d mpp_buffer_group_get failed\n"); + return ret; + } + } + + { + RK_U32 i = 0; + RK_U32 max_cnt = reg_cxt->fast_mode ? MAX_GEN_REG : 1; + + //!< malloc buffers + ret = mpp_buffer_get(reg_cxt->group, ®_cxt->bufs, ALL_BUFFER_SIZE(max_cnt)); + if (ret) { + mpp_err("h265d mpp_buffer_get failed\n"); + return ret; + } + + reg_cxt->bufs_fd = mpp_buffer_get_fd(reg_cxt->bufs); + reg_cxt->offset_cabac = CABAC_TAB_OFFSET; + for (i = 0; i < max_cnt; i++) { + reg_cxt->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382H265dRegSet)); + reg_cxt->offset_spspps[i] = SPSPPS_OFFSET(i); + reg_cxt->offset_rps[i] = RPS_OFFSET(i); + reg_cxt->offset_sclst[i] = SCALIST_OFFSET(i); + } + } + + if (!reg_cxt->fast_mode) { + reg_cxt->hw_regs = reg_cxt->g_buf[0].hw_regs; + reg_cxt->spspps_offset = reg_cxt->offset_spspps[0]; + reg_cxt->rps_offset = reg_cxt->offset_rps[0]; + reg_cxt->sclst_offset = reg_cxt->offset_sclst[0]; + } + + ret = mpp_buffer_write(reg_cxt->bufs, 0, (void*)cabac_table, sizeof(cabac_table)); + if (ret) { + mpp_err("h265d write cabac_table data failed\n"); + return ret; + } + + { + // report hw_info to parser + const MppSocInfo *info = mpp_get_soc_info(); + const void *hw_info = NULL; + RK_U32 i; + + for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) { + if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) { + hw_info = info->dec_caps[i]; + break; + } + } + + mpp_assert(hw_info); + cfg->hw_info = hw_info; + } + +#ifdef dump + fp = fopen("/data/hal.bin", "wb"); +#endif + (void) cfg; + return MPP_OK; +} + +static MPP_RET hal_h265d_vdpu382_deinit(void *hal) +{ + HalH265dCtx *reg_cxt = (HalH265dCtx *)hal; + RK_U32 loop = reg_cxt->fast_mode ? MPP_ARRAY_ELEMS(reg_cxt->g_buf) : 1; + RK_U32 i; + + if (reg_cxt->bufs) { + mpp_buffer_put(reg_cxt->bufs); + reg_cxt->bufs = NULL; + } + + loop = reg_cxt->fast_mode ? MPP_ARRAY_ELEMS(reg_cxt->rcb_buf) : 1; + for (i = 0; i < loop; i++) { + if (reg_cxt->rcb_buf[i]) { + mpp_buffer_put(reg_cxt->rcb_buf[i]); + reg_cxt->rcb_buf[i] = NULL; + } + } + + if (reg_cxt->group) { + mpp_buffer_group_put(reg_cxt->group); + reg_cxt->group = NULL; + } + + for (i = 0; i < loop; i++) + MPP_FREE(reg_cxt->g_buf[i].hw_regs); + + MPP_FREE(reg_cxt->scaling_qm); + MPP_FREE(reg_cxt->scaling_rk); + MPP_FREE(reg_cxt->pps_buf); + MPP_FREE(reg_cxt->sw_rps_buf); + + if (reg_cxt->cmv_bufs) { + hal_bufs_deinit(reg_cxt->cmv_bufs); + reg_cxt->cmv_bufs = NULL; + } + + return MPP_OK; +} + +static RK_S32 hal_h265d_v382_output_pps_packet(void *hal, void *dxva) +{ + RK_S32 fifo_len = 14;//12 + RK_S32 i, j; + RK_U32 addr; + RK_U32 log2_min_cb_size; + RK_S32 width, height; + HalH265dCtx *reg_cxt = ( HalH265dCtx *)hal; + Vdpu382H265dRegSet *hw_reg = (Vdpu382H265dRegSet*)(reg_cxt->hw_regs); + h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva; + BitputCtx_t bp; + + if (NULL == reg_cxt || dxva_cxt == NULL) { + mpp_err("%s:%s:%d reg_cxt or dxva_cxt is NULL", + __FILE__, __FUNCTION__, __LINE__); + return MPP_ERR_NULL_PTR; + } + void *pps_ptr = mpp_buffer_get_ptr(reg_cxt->bufs) + reg_cxt->spspps_offset; + if (dxva_cxt->pp.ps_update_flag) { + RK_U64 *pps_packet = reg_cxt->pps_buf; + if (NULL == pps_ptr) { + mpp_err("pps_data get ptr error"); + return MPP_ERR_NOMEM; + } + + for (i = 0; i < 14; i++) pps_packet[i] = 0; + + mpp_set_bitput_ctx(&bp, pps_packet, fifo_len); + + // SPS + mpp_put_bits(&bp, dxva_cxt->pp.vps_id , 4); + mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4); + mpp_put_bits(&bp, dxva_cxt->pp.chroma_format_idc , 2); + + log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3; + width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size); + height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size); + + mpp_put_bits(&bp, width , 16); + mpp_put_bits(&bp, height , 16); + mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_luma_minus8 + 8 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.bit_depth_chroma_minus8 + 8 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.log2_max_pic_order_cnt_lsb_minus4 + 4 , 5); + mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size , 2); //log2_maxa_coding_block_depth + mpp_put_bits(&bp, dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3 , 3); + mpp_put_bits(&bp, dxva_cxt->pp.log2_min_transform_block_size_minus2 + 2 , 3); + ///<-zrh comment ^ 63 bit above + mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_transform_block_size , 2); + mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_inter , 3); + mpp_put_bits(&bp, dxva_cxt->pp.max_transform_hierarchy_depth_intra , 3); + mpp_put_bits(&bp, dxva_cxt->pp.scaling_list_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.amp_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.sample_adaptive_offset_enabled_flag , 1); + ///<-zrh comment ^ 68 bit above + mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_luma_minus1 + 1) : 0 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.pcm_sample_bit_depth_chroma_minus1 + 1) : 0 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.pcm_loop_filter_disabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.log2_diff_max_min_pcm_luma_coding_block_size , 3); + mpp_put_bits(&bp, dxva_cxt->pp.pcm_enabled_flag ? (dxva_cxt->pp.log2_min_pcm_luma_coding_block_size_minus3 + 3) : 0, 3); + + mpp_put_bits(&bp, dxva_cxt->pp.num_short_term_ref_pic_sets , 7); + mpp_put_bits(&bp, dxva_cxt->pp.long_term_ref_pics_present_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.num_long_term_ref_pics_sps , 6); + mpp_put_bits(&bp, dxva_cxt->pp.sps_temporal_mvp_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.strong_intra_smoothing_enabled_flag , 1); + ///<-zrh comment ^ 100 bit above + + mpp_put_bits(&bp, 0 , 7 ); //49bits + //yandong change + mpp_put_bits(&bp, dxva_cxt->pp.sps_max_dec_pic_buffering_minus1, 4); + mpp_put_bits(&bp, 0, 3); + mpp_put_align(&bp , 32, 0xf); //128 + // PPS + mpp_put_bits(&bp, dxva_cxt->pp.pps_id , 6 ); + mpp_put_bits(&bp, dxva_cxt->pp.sps_id , 4 ); + mpp_put_bits(&bp, dxva_cxt->pp.dependent_slice_segments_enabled_flag , 1 ); + mpp_put_bits(&bp, dxva_cxt->pp.output_flag_present_flag , 1 ); + mpp_put_bits(&bp, dxva_cxt->pp.num_extra_slice_header_bits , 13); + mpp_put_bits(&bp, dxva_cxt->pp.sign_data_hiding_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.cabac_init_present_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l0_default_active_minus1 + 1 , 4);//31 bits + mpp_put_bits(&bp, dxva_cxt->pp.num_ref_idx_l1_default_active_minus1 + 1 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.init_qp_minus26 , 7); + mpp_put_bits(&bp, dxva_cxt->pp.constrained_intra_pred_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.transform_skip_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.cu_qp_delta_enabled_flag , 1); //164 + mpp_put_bits(&bp, log2_min_cb_size + + dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size - + dxva_cxt->pp.diff_cu_qp_delta_depth , 3); + + h265h_dbg(H265H_DBG_PPS, "log2_min_cb_size %d %d %d \n", log2_min_cb_size, + dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size, dxva_cxt->pp.diff_cu_qp_delta_depth ); + + mpp_put_bits(&bp, dxva_cxt->pp.pps_cb_qp_offset , 5); + mpp_put_bits(&bp, dxva_cxt->pp.pps_cr_qp_offset , 5); + mpp_put_bits(&bp, dxva_cxt->pp.pps_slice_chroma_qp_offsets_present_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.weighted_pred_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.weighted_bipred_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.transquant_bypass_enabled_flag , 1 ); + mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag , 1 ); + mpp_put_bits(&bp, dxva_cxt->pp.entropy_coding_sync_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.pps_loop_filter_across_slices_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.loop_filter_across_tiles_enabled_flag , 1); //185 + mpp_put_bits(&bp, dxva_cxt->pp.deblocking_filter_override_enabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.pps_deblocking_filter_disabled_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.pps_beta_offset_div2 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.pps_tc_offset_div2 , 4); + mpp_put_bits(&bp, dxva_cxt->pp.lists_modification_present_flag , 1); + mpp_put_bits(&bp, dxva_cxt->pp.log2_parallel_merge_level_minus2 + 2 , 3); + mpp_put_bits(&bp, dxva_cxt->pp.slice_segment_header_extension_present_flag , 1); + mpp_put_bits(&bp, 0 , 3); + mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_columns_minus1 + 1 : 0, 5); + mpp_put_bits(&bp, dxva_cxt->pp.tiles_enabled_flag ? dxva_cxt->pp.num_tile_rows_minus1 + 1 : 0 , 5 ); + mpp_put_bits(&bp, 0, 4);//2 //mSps_Pps[i]->mMode + mpp_put_align(&bp, 64, 0xf); + { + /// tiles info begin + RK_U16 column_width[20]; + RK_U16 row_height[22]; + + memset(column_width, 0, sizeof(column_width)); + memset(row_height, 0, sizeof(row_height)); + + if (dxva_cxt->pp.tiles_enabled_flag) { + + if (dxva_cxt->pp.uniform_spacing_flag == 0) { + RK_S32 maxcuwidth = dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size; + RK_S32 ctu_width_in_pic = (width + + (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ; + RK_S32 ctu_height_in_pic = (height + + (1 << maxcuwidth) - 1) / (1 << maxcuwidth) ; + RK_S32 sum = 0; + for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1; i++) { + column_width[i] = dxva_cxt->pp.column_width_minus1[i] + 1; + sum += column_width[i] ; + } + column_width[i] = ctu_width_in_pic - sum; + + sum = 0; + for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1; i++) { + row_height[i] = dxva_cxt->pp.row_height_minus1[i] + 1; + sum += row_height[i]; + } + row_height[i] = ctu_height_in_pic - sum; + } // end of (pps->uniform_spacing_flag == 0) + else { + + RK_S32 pic_in_cts_width = (width + + (1 << (log2_min_cb_size + + dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1) + / (1 << (log2_min_cb_size + + dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)); + RK_S32 pic_in_cts_height = (height + + (1 << (log2_min_cb_size + + dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)) - 1) + / (1 << (log2_min_cb_size + + dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size)); + + for (i = 0; i < dxva_cxt->pp.num_tile_columns_minus1 + 1; i++) + column_width[i] = ((i + 1) * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1) - + (i * pic_in_cts_width) / (dxva_cxt->pp.num_tile_columns_minus1 + 1); + + for (i = 0; i < dxva_cxt->pp.num_tile_rows_minus1 + 1; i++) + row_height[i] = ((i + 1) * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1) - + (i * pic_in_cts_height) / (dxva_cxt->pp.num_tile_rows_minus1 + 1); + } + } // pps->tiles_enabled_flag + else { + RK_S32 MaxCUWidth = (1 << (dxva_cxt->pp.log2_diff_max_min_luma_coding_block_size + log2_min_cb_size)); + column_width[0] = (width + MaxCUWidth - 1) / MaxCUWidth; + row_height[0] = (height + MaxCUWidth - 1) / MaxCUWidth; + } + + for (j = 0; j < 20; j++) { + if (column_width[j] > 0) + column_width[j]--; + mpp_put_bits(&bp, column_width[j], 12); + } + + for (j = 0; j < 22; j++) { + if (row_height[j] > 0) + row_height[j]--; + mpp_put_bits(&bp, row_height[j], 12); + } + } + + mpp_put_bits(&bp, 0, 32); + mpp_put_bits(&bp, 0, 70); + mpp_put_align(&bp, 64, 0xf);//128 + } + + if (dxva_cxt->pp.scaling_list_enabled_flag) { + MppDevRegOffsetCfg trans_cfg; + RK_U8 *ptr_scaling = (RK_U8 *)mpp_buffer_get_ptr(reg_cxt->bufs) + reg_cxt->sclst_offset; + + if (dxva_cxt->pp.scaling_list_data_present_flag) { + addr = (dxva_cxt->pp.pps_id + 16) * 1360; + } else if (dxva_cxt->pp.scaling_list_enabled_flag) { + addr = dxva_cxt->pp.sps_id * 1360; + } else { + addr = 80 * 1360; + } + + hal_h265d_output_scalinglist_packet(hal, ptr_scaling + addr, dxva); + + hw_reg->h265d_addr.reg180_scanlist_addr = reg_cxt->bufs_fd; + hw_reg->common.reg012.scanlist_addr_valid_en = 1; + + /* need to config addr */ + trans_cfg.reg_idx = 180; + trans_cfg.offset = addr + reg_cxt->sclst_offset; + mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + for (i = 0; i < 64; i++) + memcpy(pps_ptr + i * 112, reg_cxt->pps_buf, 112); +#ifdef dump + fwrite(pps_ptr, 1, 80 * 64, fp); + RK_U32 *tmp = (RK_U32 *)pps_ptr; + for (i = 0; i < 112 / 4; i++) { + mpp_log("pps[%3d] = 0x%08x\n", i, tmp[i]); + } +#endif + return 0; +} + +static void h265d_refine_rcb_size(Vdpu382RcbInfo *rcb_info, + Vdpu382H265dRegSet *hw_regs, + RK_S32 width, RK_S32 height, void *dxva) +{ + RK_U32 rcb_bits = 0; + h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva; + DXVA_PicParams_HEVC *pp = &dxva_cxt->pp; + RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444 + RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8; + RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3); + RK_U32 tile_col_cut_num = pp->num_tile_columns_minus1; + RK_U32 ext_align_size = tile_col_cut_num * 64 * 8; + + width = MPP_ALIGN(width, ctu_size); + height = MPP_ALIGN(height, ctu_size); + /* RCB_STRMD_ROW */ + if (width > 8192) { + RK_U32 factor = ctu_size / 16; + rcb_bits = (MPP_ALIGN(width, ctu_size) + factor - 1) * factor * 24 + ext_align_size; + } else + rcb_bits = 0; + rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_TRANSD_ROW */ + if (width > 8192) + rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size; + else + rcb_bits = 0; + rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_TRANSD_COL */ + if (height > 8192) + rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size; + else + rcb_bits = 0; + rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_ROW */ + rcb_bits = width * 22 + ext_align_size; + rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_COL */ + rcb_bits = height * 22 + ext_align_size; + rcb_info[RCB_INTER_COL].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTRA_ROW */ + rcb_bits = width * 48 + ext_align_size; + rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_DBLK_ROW */ + if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) { + if (ctu_size == 32) + rcb_bits = width * ( 4 + 6 * bit_depth); + else + rcb_bits = width * ( 2 + 6 * bit_depth); + } else { + if (ctu_size == 32) + rcb_bits = width * ( 4 + 8 * bit_depth); + else + rcb_bits = width * ( 2 + 8 * bit_depth); + } + rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 256 : 192)) + ext_align_size; + rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_SAO_ROW */ + if (chroma_fmt_idc == 1 || chroma_fmt_idc == 2) { + rcb_bits = width * (128 / ctu_size + 2 * bit_depth); + } else { + rcb_bits = width * (128 / ctu_size + 3 * bit_depth); + } + rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 160 : 128)) + ext_align_size; + rcb_info[RCB_SAO_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_FBC_ROW */ + if (hw_regs->common.reg012.fbc_e) { + rcb_bits = width * (chroma_fmt_idc - 1) * 2 * bit_depth; + rcb_bits += (tile_col_cut_num * (bit_depth == 8 ? 128 : 64)) + ext_align_size; + } else + rcb_bits = 0; + rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_FILT_COL */ + if (hw_regs->common.reg012.fbc_e) { + RK_U32 ctu_idx = ctu_size >> 5; + RK_U32 a = filterd_fbc_on[chroma_fmt_idc][ctu_idx].a; + RK_U32 b = filterd_fbc_on[chroma_fmt_idc][ctu_idx].b; + + rcb_bits = height * (a * bit_depth + b); + } else { + RK_U32 ctu_idx = ctu_size >> 5; + RK_U32 a = filterd_fbc_off[chroma_fmt_idc][ctu_idx].a; + RK_U32 b = filterd_fbc_off[chroma_fmt_idc][ctu_idx].b; + + rcb_bits = height * (a * bit_depth + b + (bit_depth == 10 ? 192 * ctu_size >> 4 : 0)); + } + rcb_bits += ext_align_size; + rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits); +} + +static void hal_h265d_rcb_info_update(void *hal, void *dxva, + Vdpu382H265dRegSet *hw_regs, + RK_S32 width, RK_S32 height) +{ + HalH265dCtx *reg_cxt = ( HalH265dCtx *)hal; + h265d_dxva2_picture_context_t *dxva_cxt = (h265d_dxva2_picture_context_t*)dxva; + DXVA_PicParams_HEVC *pp = &dxva_cxt->pp; + RK_U32 chroma_fmt_idc = pp->chroma_format_idc;//0 400,1 4202 ,422,3 444 + RK_U8 bit_depth = MPP_MAX(pp->bit_depth_luma_minus8, pp->bit_depth_chroma_minus8) + 8; + RK_U8 ctu_size = 1 << (pp->log2_diff_max_min_luma_coding_block_size + pp->log2_min_luma_coding_block_size_minus3 + 3); + RK_U32 num_tiles = pp->num_tile_rows_minus1 + 1; + + if (reg_cxt->num_row_tiles != num_tiles || + reg_cxt->bit_depth != bit_depth || + reg_cxt->chroma_fmt_idc != chroma_fmt_idc || + reg_cxt->ctu_size != ctu_size || + reg_cxt->width != width || + reg_cxt->height != height) { + RK_U32 i = 0; + RK_U32 loop = reg_cxt->fast_mode ? MPP_ARRAY_ELEMS(reg_cxt->g_buf) : 1; + + reg_cxt->rcb_buf_size = vdpu382_get_rcb_buf_size((Vdpu382RcbInfo*)reg_cxt->rcb_info, width, height); + h265d_refine_rcb_size((Vdpu382RcbInfo*)reg_cxt->rcb_info, hw_regs, width, height, dxva_cxt); + + for (i = 0; i < loop; i++) { + MppBuffer rcb_buf; + + if (reg_cxt->rcb_buf[i]) { + mpp_buffer_put(reg_cxt->rcb_buf[i]); + reg_cxt->rcb_buf[i] = NULL; + } + mpp_buffer_get(reg_cxt->group, &rcb_buf, reg_cxt->rcb_buf_size); + reg_cxt->rcb_buf[i] = rcb_buf; + } + + reg_cxt->num_row_tiles = num_tiles; + reg_cxt->bit_depth = bit_depth; + reg_cxt->chroma_fmt_idc = chroma_fmt_idc; + reg_cxt->ctu_size = ctu_size; + reg_cxt->width = width; + reg_cxt->height = height; + } +} + +#define SET_POC_HIGNBIT_INFO(regs, index, field, value)\ + do{ \ + switch(index){\ + case 0: regs.reg200.ref0_##field = value; break;\ + case 1: regs.reg200.ref1_##field = value; break;\ + case 2: regs.reg200.ref2_##field = value; break;\ + case 3: regs.reg200.ref3_##field = value; break;\ + case 4: regs.reg200.ref4_##field = value; break;\ + case 5: regs.reg200.ref5_##field = value; break;\ + case 6: regs.reg200.ref6_##field = value; break;\ + case 7: regs.reg200.ref7_##field = value; break;\ + case 8: regs.reg201.ref8_##field = value; break;\ + case 9: regs.reg201.ref9_##field = value; break;\ + case 10: regs.reg201.ref10_##field = value; break;\ + case 11: regs.reg201.ref11_##field = value; break;\ + case 12: regs.reg201.ref12_##field = value; break;\ + case 13: regs.reg201.ref13_##field = value; break;\ + case 14: regs.reg201.ref14_##field = value; break;\ + case 15: regs.reg201.ref15_##field = value; break;\ + default: break;}\ + }while(0) + +#define pocdistance(a, b) (((a) > (b)) ? ((a) - (b)) : ((b) - (a))) +#define MAX_INT 2147483647 + +static MPP_RET hal_h265d_vdpu382_gen_regs(void *hal, HalTaskInfo *syn) +{ + RK_S32 i = 0; + RK_S32 log2_min_cb_size; + RK_S32 width, height; + RK_S32 stride_y, stride_uv, virstrid_y; + Vdpu382H265dRegSet *hw_regs; + RK_S32 ret = MPP_SUCCESS; + MppBuffer streambuf = NULL; + RK_S32 aglin_offset = 0; + RK_S32 valid_ref = -1; + MppBuffer framebuf = NULL; + RK_U32 sw_ref_valid = 0; + HalBuf *mv_buf = NULL; + RK_S32 fd = -1; + RK_U32 mv_size = 0; + RK_S32 distance = MAX_INT; + h265d_dxva2_picture_context_t *dxva_cxt = + (h265d_dxva2_picture_context_t *)syn->dec.syntax.data; + HalH265dCtx *reg_cxt = ( HalH265dCtx *)hal; + void *rps_ptr = NULL; + RK_U32 stream_buf_size = 0; + + if (syn->dec.flags.parse_err || + syn->dec.flags.ref_err) { + h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__); + return MPP_OK; + } + + if (reg_cxt ->fast_mode) { + for (i = 0; i < MAX_GEN_REG; i++) { + if (!reg_cxt->g_buf[i].use_flag) { + syn->dec.reg_index = i; + + reg_cxt->spspps_offset = reg_cxt->offset_spspps[i]; + reg_cxt->rps_offset = reg_cxt->offset_rps[i]; + reg_cxt->sclst_offset = reg_cxt->offset_sclst[i]; + + reg_cxt->hw_regs = reg_cxt->g_buf[i].hw_regs; + reg_cxt->g_buf[i].use_flag = 1; + break; + } + } + if (i == MAX_GEN_REG) { + mpp_err("hevc rps buf all used"); + return MPP_ERR_NOMEM; + } + } + rps_ptr = mpp_buffer_get_ptr(reg_cxt->bufs) + reg_cxt->rps_offset; + if (NULL == rps_ptr) { + + mpp_err("rps_data get ptr error"); + return MPP_ERR_NOMEM; + } + + + if (syn->dec.syntax.data == NULL) { + mpp_err("%s:%s:%d dxva is NULL", __FILE__, __FUNCTION__, __LINE__); + return MPP_ERR_NULL_PTR; + } + + /* output pps */ + hw_regs = (Vdpu382H265dRegSet*)reg_cxt->hw_regs; + memset(hw_regs, 0, sizeof(Vdpu382H265dRegSet)); + + hal_h265d_v382_output_pps_packet(hal, syn->dec.syntax.data); + + if (NULL == reg_cxt->hw_regs) { + return MPP_ERR_NULL_PTR; + } + + + log2_min_cb_size = dxva_cxt->pp.log2_min_luma_coding_block_size_minus3 + 3; + + width = (dxva_cxt->pp.PicWidthInMinCbsY << log2_min_cb_size); + height = (dxva_cxt->pp.PicHeightInMinCbsY << log2_min_cb_size); + mv_size = (MPP_ALIGN(width, 64) * MPP_ALIGN(height, 64)) >> 3; + if (reg_cxt->cmv_bufs == NULL || reg_cxt->mv_size < mv_size) { + size_t size = mv_size; + + if (reg_cxt->cmv_bufs) { + hal_bufs_deinit(reg_cxt->cmv_bufs); + reg_cxt->cmv_bufs = NULL; + } + + hal_bufs_init(®_cxt->cmv_bufs); + if (reg_cxt->cmv_bufs == NULL) { + mpp_err_f("colmv bufs init fail"); + return MPP_ERR_NULL_PTR; + } + + reg_cxt->mv_size = mv_size; + reg_cxt->mv_count = mpp_buf_slot_get_count(reg_cxt->slots); + hal_bufs_setup(reg_cxt->cmv_bufs, reg_cxt->mv_count, 1, &size); + } + + { + MppFrame mframe = NULL; + RK_U32 ver_virstride; + + mpp_buf_slot_get_prop(reg_cxt->slots, dxva_cxt->pp.CurrPic.Index7Bits, + SLOT_FRAME_PTR, &mframe); + stride_y = mpp_frame_get_hor_stride(mframe); + ver_virstride = mpp_frame_get_ver_stride(mframe); + stride_uv = stride_y; + virstrid_y = ver_virstride * stride_y; + hw_regs->common.reg013.h26x_error_mode = 1; + hw_regs->common.reg021.error_deb_en = 1; + hw_regs->common.reg021.inter_error_prc_mode = 0; + hw_regs->common.reg021.error_intra_mode = 1; + + hw_regs->common.reg017.slice_num = dxva_cxt->slice_count; + hw_regs->h265d_param.reg64.h26x_rps_mode = 0; + hw_regs->h265d_param.reg64.h26x_frame_orslice = 0; + hw_regs->h265d_param.reg64.h26x_stream_mode = 0; + + if (MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe))) { + RK_U32 pixel_width = MPP_ALIGN(mpp_frame_get_width(mframe), 64); + RK_U32 fbd_offset = MPP_ALIGN(pixel_width * (MPP_ALIGN(ver_virstride, 64) + 16) / 16, + SZ_4K); + + hw_regs->common.reg012.fbc_e = 1; + hw_regs->common.reg018.y_hor_virstride = pixel_width >> 4; + hw_regs->common.reg019.uv_hor_virstride = pixel_width >> 4; + hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4; + } else { + hw_regs->common.reg012.fbc_e = 0; + hw_regs->common.reg018.y_hor_virstride = stride_y >> 4; + hw_regs->common.reg019.uv_hor_virstride = stride_uv >> 4; + hw_regs->common.reg020_y_virstride.y_virstride = virstrid_y >> 4; + } + } + mpp_buf_slot_get_prop(reg_cxt->slots, dxva_cxt->pp.CurrPic.Index7Bits, + SLOT_BUFFER, &framebuf); + hw_regs->common_addr.reg130_decout_base = mpp_buffer_get_fd(framebuf); //just index need map + /*if out_base is equal to zero it means this frame may error + we return directly add by csy*/ + + if (hw_regs->common_addr.reg130_decout_base == 0) { + return 0; + } + fd = mpp_buffer_get_fd(framebuf); + hw_regs->common_addr.reg130_decout_base = fd; + mv_buf = hal_bufs_get_buf(reg_cxt->cmv_bufs, dxva_cxt->pp.CurrPic.Index7Bits); + hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]); + + hw_regs->h265d_param.reg65.cur_top_poc = dxva_cxt->pp.CurrPicOrderCntVal; + + mpp_buf_slot_get_prop(reg_cxt->packet_slots, syn->dec.input, SLOT_BUFFER, + &streambuf); + if ( dxva_cxt->bitstream == NULL) { + dxva_cxt->bitstream = mpp_buffer_get_ptr(streambuf); + } +#ifdef HW_RPS + hw_regs->h265d_param.reg103.ref_pic_layer_same_with_cur = 0xffff; + hal_h265d_slice_hw_rps(syn->dec.syntax.data, rps_ptr, reg_cxt->sw_rps_buf, reg_cxt->fast_mode); +#else + hw_regs->sw_sysctrl.sw_h26x_rps_mode = 1; + hal_h265d_slice_output_rps(syn->dec.syntax.data, rps_ptr); +#endif + + MppDevRegOffsetCfg trans_cfg; + /* cabac table */ + hw_regs->h265d_addr.reg197_cabactbl_base = reg_cxt->bufs_fd; + /* pps */ + hw_regs->h265d_addr.reg161_pps_base = reg_cxt->bufs_fd; + hw_regs->h265d_addr.reg163_rps_base = reg_cxt->bufs_fd; + + hw_regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(streambuf); + hw_regs->common_addr.reg129_rlcwrite_base = mpp_buffer_get_fd(streambuf); + stream_buf_size = mpp_buffer_get_size(streambuf); + hw_regs->common.reg016_str_len = ((dxva_cxt->bitstream_size + 15) + & (~15)) + 64; + hw_regs->common.reg016_str_len = stream_buf_size > hw_regs->common.reg016_str_len ? + hw_regs->common.reg016_str_len : stream_buf_size; + + aglin_offset = hw_regs->common.reg016_str_len - dxva_cxt->bitstream_size; + if (aglin_offset > 0) { + memset((void *)(dxva_cxt->bitstream + dxva_cxt->bitstream_size), 0, + aglin_offset); + } + hw_regs->common.reg010.dec_e = 1; + hw_regs->common.reg012.wr_ddr_align_en = dxva_cxt->pp.tiles_enabled_flag + ? 0 : 1; + hw_regs->common.reg012.colmv_compress_en = 1; + + hw_regs->common.reg024.cabac_err_en_lowbits = 0xffffdfff; + hw_regs->common.reg025.cabac_err_en_highbits = 0x3ffbf9ff; + + hw_regs->common.reg011.dec_clkgate_e = 1; + hw_regs->common.reg026.swreg_block_gating_e = 0xfffff; + hw_regs->common.reg026.reg_cfg_gating_en = 1; + hw_regs->common.reg032_timeout_threshold = 0x3ffff; + + valid_ref = hw_regs->common_addr.reg130_decout_base; + reg_cxt->error_index = dxva_cxt->pp.CurrPic.Index7Bits; + hw_regs->common_addr.reg132_error_ref_base = valid_ref; + + memset(&hw_regs->highpoc.reg205, 0, sizeof(RK_U32)); + + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) { + if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff && + dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) { + + MppFrame mframe = NULL; + hw_regs->h265d_param.reg67_82_ref_poc[i] = dxva_cxt->pp.PicOrderCntValList[i]; + mpp_buf_slot_get_prop(reg_cxt->slots, + dxva_cxt->pp.RefPicList[i].Index7Bits, + SLOT_BUFFER, &framebuf); + mpp_buf_slot_get_prop(reg_cxt->slots, dxva_cxt->pp.RefPicList[i].Index7Bits, + SLOT_FRAME_PTR, &mframe); + if (framebuf != NULL) { + hw_regs->h265d_addr.reg164_179_ref_base[i] = mpp_buffer_get_fd(framebuf); + valid_ref = hw_regs->h265d_addr.reg164_179_ref_base[i]; + // mpp_log("cur poc %d, ref poc %d", dxva_cxt->pp.current_poc, dxva_cxt->pp.PicOrderCntValList[i]); + if ((pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc) < distance) + && (!mpp_frame_get_errinfo(mframe))) { + distance = pocdistance(dxva_cxt->pp.PicOrderCntValList[i], dxva_cxt->pp.current_poc); + hw_regs->common_addr.reg132_error_ref_base = hw_regs->h265d_addr.reg164_179_ref_base[i]; + reg_cxt->error_index = dxva_cxt->pp.RefPicList[i].Index7Bits; + hw_regs->common.reg021.error_intra_mode = 0; + + } + } else { + hw_regs->h265d_addr.reg164_179_ref_base[i] = valid_ref; + } + + mv_buf = hal_bufs_get_buf(reg_cxt->cmv_bufs, dxva_cxt->pp.RefPicList[i].Index7Bits); + hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + + sw_ref_valid |= (1 << i); + SET_REF_VALID(hw_regs->h265d_param, i, 1); + } + } + + if ((reg_cxt->error_index == dxva_cxt->pp.CurrPic.Index7Bits) && !dxva_cxt->pp.IntraPicFlag) { + // mpp_err("current frm may be err, should skip process"); + syn->dec.flags.ref_err = 1; + return MPP_OK; + } + + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(dxva_cxt->pp.RefPicList); i++) { + + if (dxva_cxt->pp.RefPicList[i].bPicEntry != 0xff && + dxva_cxt->pp.RefPicList[i].bPicEntry != 0x7f) { + MppFrame mframe = NULL; + + mpp_buf_slot_get_prop(reg_cxt->slots, + dxva_cxt->pp.RefPicList[i].Index7Bits, + SLOT_BUFFER, &framebuf); + + mpp_buf_slot_get_prop(reg_cxt->slots, dxva_cxt->pp.RefPicList[i].Index7Bits, + SLOT_FRAME_PTR, &mframe); + + if (framebuf == NULL || mpp_frame_get_errinfo(mframe)) { + mv_buf = hal_bufs_get_buf(reg_cxt->cmv_bufs, reg_cxt->error_index); + hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base; + hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + } + } else { + mv_buf = hal_bufs_get_buf(reg_cxt->cmv_bufs, reg_cxt->error_index); + hw_regs->h265d_addr.reg164_179_ref_base[i] = hw_regs->common_addr.reg132_error_ref_base; + hw_regs->h265d_addr.reg181_196_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + /* mark 3 to differ from current frame */ + if (reg_cxt->error_index == dxva_cxt->pp.CurrPic.Index7Bits) + SET_POC_HIGNBIT_INFO(hw_regs->highpoc, i, poc_highbit, 3); + } + } + + trans_cfg.reg_idx = 161; + trans_cfg.offset = reg_cxt->spspps_offset; + mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + /* rps */ + trans_cfg.reg_idx = 163; + trans_cfg.offset = reg_cxt->rps_offset; + mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_OFFSET, &trans_cfg); + + hw_regs->common.reg013.cur_pic_is_idr = dxva_cxt->pp.IdrPicFlag;//p_hal->slice_long->idr_flag; + + hw_regs->common.reg011.buf_empty_en = 1; + + hal_h265d_rcb_info_update(hal, dxva_cxt, hw_regs, width, height); + vdpu382_setup_rcb(&hw_regs->common_addr, reg_cxt->dev, reg_cxt->fast_mode ? + reg_cxt->rcb_buf[syn->dec.reg_index] : reg_cxt->rcb_buf[0], + (Vdpu382RcbInfo*)reg_cxt->rcb_info); + vdpu382_setup_statistic(&hw_regs->common, &hw_regs->statistic); + + return ret; +} + +static MPP_RET hal_h265d_vdpu382_start(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + RK_U8* p = NULL; + Vdpu382H265dRegSet *hw_regs = NULL; + HalH265dCtx *reg_cxt = (HalH265dCtx *)hal; + RK_S32 index = task->dec.reg_index; + + RK_U32 i; + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__); + return MPP_OK; + } + + if (reg_cxt->fast_mode) { + p = (RK_U8*)reg_cxt->g_buf[index].hw_regs; + hw_regs = ( Vdpu382H265dRegSet *)reg_cxt->g_buf[index].hw_regs; + } else { + p = (RK_U8*)reg_cxt->hw_regs; + hw_regs = ( Vdpu382H265dRegSet *)reg_cxt->hw_regs; + } + + if (hw_regs == NULL) { + mpp_err("hal_h265d_start hw_regs is NULL"); + return MPP_ERR_NULL_PTR; + } + for (i = 0; i < 68; i++) { + h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", + i, *((RK_U32*)p)); + //mpp_log("RK_HEVC_DEC: regs[%02d]=%08X\n", i, *((RK_U32*)p)); + p += 4; + } + + do { + MppDevRegWrCfg wr_cfg; + MppDevRegRdCfg rd_cfg; + + wr_cfg.reg = &hw_regs->common; + wr_cfg.size = sizeof(hw_regs->common); + wr_cfg.offset = OFFSET_COMMON_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->h265d_param; + wr_cfg.size = sizeof(hw_regs->h265d_param); + wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->common_addr; + wr_cfg.size = sizeof(hw_regs->common_addr); + wr_cfg.offset = OFFSET_COMMON_ADDR_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->h265d_addr; + wr_cfg.size = sizeof(hw_regs->h265d_addr); + wr_cfg.offset = OFFSET_CODEC_ADDR_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->statistic; + wr_cfg.size = sizeof(hw_regs->statistic); + wr_cfg.offset = OFFSET_STATISTIC_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->highpoc; + wr_cfg.size = sizeof(hw_regs->highpoc); + wr_cfg.offset = OFFSET_POC_HIGHBIT_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + rd_cfg.reg = &hw_regs->irq_status; + rd_cfg.size = sizeof(hw_regs->irq_status); + rd_cfg.offset = OFFSET_INTERRUPT_REGS; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_REG_RD, &rd_cfg); + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + /* rcb info for sram */ + { + MppDevRcbInfoCfg rcb_cfg; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + + memcpy(rcb_info, reg_cxt->rcb_info, sizeof(rcb_info)); + qsort(rcb_info, MPP_ARRAY_ELEMS(rcb_info), + sizeof(rcb_info[0]), vdpu382_compare_rcb_size); + + for (i = 0; i < MPP_ARRAY_ELEMS(rcb_info); i++) { + rcb_cfg.reg_idx = rcb_info[i].reg; + rcb_cfg.size = rcb_info[i].size; + if (rcb_cfg.size > 0) + mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_RCB_INFO, &rcb_cfg); + else + break; + } + } + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_CMD_SEND, NULL); + if (ret) { + mpp_err_f("send cmd failed %d\n", ret); + break; + } + } while (0); + + return ret; +} + + +static MPP_RET hal_h265d_vdpu382_wait(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + RK_S32 index = task->dec.reg_index; + HalH265dCtx *reg_cxt = (HalH265dCtx *)hal; + RK_U8* p = NULL; + Vdpu382H265dRegSet *hw_regs = NULL; + RK_S32 i; + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err) { + h265h_dbg(H265H_DBG_TASK_ERR, "%s found task error\n", __FUNCTION__); + goto ERR_PROC; + } + + if (reg_cxt->fast_mode) { + hw_regs = ( Vdpu382H265dRegSet *)reg_cxt->g_buf[index].hw_regs; + } else { + hw_regs = ( Vdpu382H265dRegSet *)reg_cxt->hw_regs; + } + + p = (RK_U8*)hw_regs; + + ret = mpp_dev_ioctl(reg_cxt->dev, MPP_DEV_CMD_POLL, NULL); + if (ret) + mpp_err_f("poll cmd failed %d\n", ret); + +ERR_PROC: + if (task->dec.flags.parse_err || + task->dec.flags.ref_err || + hw_regs->irq_status.reg224.dec_error_sta || + hw_regs->irq_status.reg224.buf_empty_sta || + hw_regs->irq_status.reg224.dec_bus_sta || + !hw_regs->irq_status.reg224.dec_rdy_sta) { + if (!reg_cxt->fast_mode) { + if (reg_cxt->dec_cb) + mpp_callback(reg_cxt->dec_cb, &task->dec); + } else { + MppFrame mframe = NULL; + mpp_buf_slot_get_prop(reg_cxt->slots, task->dec.output, + SLOT_FRAME_PTR, &mframe); + if (mframe) { + reg_cxt->fast_mode_err_found = 1; + mpp_frame_set_errinfo(mframe, 1); + } + } + } else { + if (reg_cxt->fast_mode && reg_cxt->fast_mode_err_found) { + for (i = 0; i < (RK_S32)MPP_ARRAY_ELEMS(task->dec.refer); i++) { + if (task->dec.refer[i] >= 0) { + MppFrame frame_ref = NULL; + + mpp_buf_slot_get_prop(reg_cxt->slots, task->dec.refer[i], + SLOT_FRAME_PTR, &frame_ref); + h265h_dbg(H265H_DBG_FAST_ERR, "refer[%d] %d frame %p\n", + i, task->dec.refer[i], frame_ref); + if (frame_ref && mpp_frame_get_errinfo(frame_ref)) { + MppFrame frame_out = NULL; + mpp_buf_slot_get_prop(reg_cxt->slots, task->dec.output, + SLOT_FRAME_PTR, &frame_out); + mpp_frame_set_errinfo(frame_out, 1); + break; + } + } + } + } + } + + for (i = 0; i < 68; i++) { + if (i == 1) { + h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", + i, *((RK_U32*)p)); + } + + if (i == 45) { + h265h_dbg(H265H_DBG_REG, "RK_HEVC_DEC: regs[%02d]=%08X\n", + i, *((RK_U32*)p)); + } + p += 4; + } + + if (reg_cxt->fast_mode) { + reg_cxt->g_buf[index].use_flag = 0; + } + + return ret; +} + +static MPP_RET hal_h265d_vdpu382_reset(void *hal) +{ + MPP_RET ret = MPP_OK; + HalH265dCtx *p_hal = (HalH265dCtx *)hal; + p_hal->fast_mode_err_found = 0; + (void)hal; + return ret; +} + +static MPP_RET hal_h265d_vdpu382_flush(void *hal) +{ + MPP_RET ret = MPP_OK; + + (void)hal; + return ret; +} + +static MPP_RET hal_h265d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param) +{ + MPP_RET ret = MPP_OK; + HalH265dCtx *p_hal = (HalH265dCtx *)hal; + + (void)hal; + switch ((MpiCmd)cmd_type) { + case MPP_DEC_SET_FRAME_INFO: { + MppFrame frame = (MppFrame)param; + MppFrameFormat fmt = mpp_frame_get_fmt(frame); + + if (MPP_FRAME_FMT_IS_FBC(fmt)) { + vdpu382_afbc_align_calc(p_hal->slots, frame, 16); + } + break; + } + case MPP_DEC_SET_OUTPUT_FORMAT: { + } break; + default: + break; + } + return ret; +} + +const MppHalApi hal_h265d_vdpu382 = { + .name = "h265d_vdpu382", + .type = MPP_CTX_DEC, + .coding = MPP_VIDEO_CodingHEVC, + .ctx_size = sizeof(HalH265dCtx), + .flag = 0, + .init = hal_h265d_vdpu382_init, + .deinit = hal_h265d_vdpu382_deinit, + .reg_gen = hal_h265d_vdpu382_gen_regs, + .start = hal_h265d_vdpu382_start, + .wait = hal_h265d_vdpu382_wait, + .reset = hal_h265d_vdpu382_reset, + .flush = hal_h265d_vdpu382_flush, + .control = hal_h265d_vdpu382_control, +}; diff --git a/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.h b/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.h new file mode 100644 index 00000000..2e477ba0 --- /dev/null +++ b/mpp/hal/rkdec/h265d/hal_h265d_vdpu382.h @@ -0,0 +1,33 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_H265D_VDPU382_H__ +#define __HAL_H265D_VDPU382_H__ + +#include "mpp_hal.h" +#include "vdpu382.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const MppHalApi hal_h265d_vdpu382; + +#ifdef __cplusplus +} +#endif + +#endif /* __HAL_H265D_VDPU382_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu382.h b/mpp/hal/rkdec/inc/vdpu382.h new file mode 100644 index 00000000..e2fd7694 --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu382.h @@ -0,0 +1,22 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VDPU382_H__ +#define __VDPU382_H__ + +#define HWID_VDPU382 (0x20220601) + +#endif /* __VDPU382_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu382_avs2d.h b/mpp/hal/rkdec/inc/vdpu382_avs2d.h new file mode 100644 index 00000000..726f418e --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu382_avs2d.h @@ -0,0 +1,152 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VDPU382_AVS2D_H__ +#define __VDPU382_AVS2D_H__ + +#include "vdpu382_com.h" + +typedef struct Vdpu382RegAvs2dParam_t { + struct SWREG64_H26X_SET { + RK_U32 h26x_frame_orslice : 1; + RK_U32 h26x_rps_mode : 1; + RK_U32 h26x_stream_mode : 1; + RK_U32 h26x_stream_lastpacket : 1; + RK_U32 h264_firstslice_flag : 1; + RK_U32 reserve : 27; + } reg64; + + RK_U32 reg65_cur_top_poc; + RK_U32 reg66_cur_bot_poc; + + RK_U32 reg67_098_ref_poc[32]; + + struct SWREG99_AVS2_REF0_3_INFO { + RK_U32 ref0_field : 1; + RK_U32 : 1; + RK_U32 ref0_botfield_used : 1; + RK_U32 ref0_valid_flag : 1; + RK_U32 : 4; + RK_U32 ref1_field : 1; + RK_U32 : 1; + RK_U32 ref1_botfield_used : 1; + RK_U32 ref1_valid_flag : 1; + RK_U32 : 4; + RK_U32 ref2_field : 1; + RK_U32 : 1; + RK_U32 ref2_botfield_used : 1; + RK_U32 ref2_valid_flag : 1; + RK_U32 : 4; + RK_U32 ref3_field : 1; + RK_U32 : 1; + RK_U32 ref3_botfield_used : 1; + RK_U32 ref3_valid_flag : 1; + RK_U32 : 4; + } reg99; + + struct SWREG100_AVS2_REF4_7_INFO { + RK_U32 ref4_field : 1; + RK_U32 : 1; + RK_U32 ref4_botfield_used : 1; + RK_U32 ref4_valid_flag : 1; + RK_U32 : 4; + RK_U32 ref5_field : 1; + RK_U32 : 1; + RK_U32 ref5_botfield_used : 1; + RK_U32 ref5_valid_flag : 1; + RK_U32 : 4; + RK_U32 ref6_field : 1; + RK_U32 : 1; + RK_U32 ref6_botfield_used : 1; + RK_U32 ref6_valid_flag : 1; + RK_U32 : 4; + RK_U32 ref7_field : 1; + RK_U32 : 1; + RK_U32 ref7_botfield_used : 1; + RK_U32 ref7_valid_flag : 1; + RK_U32 : 4; + } reg100; + + RK_U32 reg101_102[2]; + + struct SW103_CTRL_EXTRA { + // 0 : use default 255, 1 : use fixed 256 + RK_U32 slice_hor_pos_ctrl : 1; + RK_U32 : 31; + } reg103; + + RK_U32 reg104; + struct SW105_HEAD_LEN { + RK_U32 head_len : 4; + RK_U32 count_update_en : 1; + RK_U32 : 27; + } reg105; + + RK_U32 reg106_111[6]; + struct SW112_ERROR_REF_INFO { + // 0 : Frame, 1 : field + RK_U32 ref_error_field : 1; + /** + * @brief Refer error is top field flag. + * 0 : Bottom field flag, + * 1 : Top field flag. + */ + RK_U32 ref_error_topfield : 1; + // For inter, 0 : top field is no used, 1 : top field is used. + RK_U32 ref_error_topfield_used : 1; + // For inter, 0 : bottom field is no used, 1 : bottom field is used. + RK_U32 ref_error_botfield_used : 1; + RK_U32 : 28; + } reg112; + +} Vdpu382RegAvs2dParam; + +typedef struct Vdpu382RegAvs2dAddr_t { + /* SWREG160 */ + RK_U32 reg160_no_use; + + /* SWREG161 */ + RK_U32 head_base; + + /* SWREG162 */ + RK_U32 reg162_no_use; + + /* SWREG163 */ + RK_U32 rps_base; + + /* SWREG164~179 */ + RK_U32 ref_base[16]; + + /* SWREG180 */ + RK_U32 scanlist_addr; + + /* SWREG181~196 */ + RK_U32 colmv_base[16]; + + /* SWREG197 */ + RK_U32 cabactbl_base; +} Vdpu382RegAvs2dAddr; + +typedef struct Vdpu382Avs2dRegSet_t { + Vdpu382RegCommon common; + Vdpu382RegAvs2dParam avs2d_param; + Vdpu382RegCommonAddr common_addr; + Vdpu382RegAvs2dAddr avs2d_addr; + Vdpu382RegIrqStatus irq_status; + Vdpu382RegStatistic statistic; +} Vdpu382Avs2dRegSet; + +#endif /*__VDPU382_AVS2D_H__*/ diff --git a/mpp/hal/rkdec/inc/vdpu382_com.h b/mpp/hal/rkdec/inc/vdpu382_com.h new file mode 100644 index 00000000..74de8efa --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu382_com.h @@ -0,0 +1,544 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VDPU382_COM_H__ +#define __VDPU382_COM_H__ + +#include "mpp_device.h" +#include "mpp_buf_slot.h" +#include "vdpu382.h" + +#define OFFSET_COMMON_REGS (8 * sizeof(RK_U32)) +#define OFFSET_CODEC_PARAMS_REGS (64 * sizeof(RK_U32)) +#define OFFSET_COMMON_ADDR_REGS (128 * sizeof(RK_U32)) +#define OFFSET_CODEC_ADDR_REGS (160 * sizeof(RK_U32)) +#define OFFSET_POC_HIGHBIT_REGS (200 * sizeof(RK_U32)) +#define OFFSET_INTERRUPT_REGS (224 * sizeof(RK_U32)) +#define OFFSET_STATISTIC_REGS (256 * sizeof(RK_U32)) + +#define RCB_ALLINE_SIZE (64) + +#define MPP_RCB_BYTES(bits) MPP_ALIGN((bits + 7) / 8, RCB_ALLINE_SIZE) + +typedef enum Vdpu382_RCB_TYPE_E { + RCB_DBLK_ROW, + RCB_INTRA_ROW, + RCB_TRANSD_ROW, + RCB_STRMD_ROW, + RCB_INTER_ROW, + RCB_SAO_ROW, + RCB_FBC_ROW, + RCB_TRANSD_COL, + RCB_INTER_COL, + RCB_FILT_COL, + + RCB_BUF_COUNT, +} Vdpu382RcbType_e; + +/* base: OFFSET_COMMON_REGS */ +typedef struct Vdpu382RegCommon_t { + struct SWREG8_IN_OUT { + RK_U32 in_endian : 1; + RK_U32 in_swap32_e : 1; + RK_U32 in_swap64_e : 1; + RK_U32 str_endian : 1; + RK_U32 str_swap32_e : 1; + RK_U32 str_swap64_e : 1; + RK_U32 out_endian : 1; + RK_U32 out_swap32_e : 1; + RK_U32 out_cbcr_swap : 1; + RK_U32 out_swap64_e : 1; + RK_U32 reserve : 22; + } reg008; + + struct SWREG9_DEC_MODE { + RK_U32 dec_mode : 10; + RK_U32 reserve : 22; + } reg009; + + struct SWREG10_DEC_E { + RK_U32 dec_e : 1; + RK_U32 reserve : 31; + } reg010; + + struct SWREG11_IMPORTANT_EN { + RK_U32 reserver : 1; + RK_U32 dec_clkgate_e : 1; + RK_U32 reserve1 : 2; + + RK_U32 dec_irq_dis : 1; + RK_U32 dec_line_irq_dis : 1; //change to reg205[9] + RK_U32 buf_empty_en : 1; + RK_U32 reserve2 : 1; + + RK_U32 dec_line_irq_en : 1; + RK_U32 reserve3 : 1; + RK_U32 dec_e_rewrite_valid : 1; + RK_U32 reserve4 : 9; + + RK_U32 softrst_en_p : 1; + RK_U32 reserve5 : 1; //change to reg205[0] + RK_U32 err_head_fill_e : 1; + RK_U32 err_colmv_fill_e : 1; + RK_U32 pix_range_detection_e : 1; + RK_U32 reserve6 : 3; + RK_U32 wlast_match_fail : 1; + RK_U32 mmu_wlast_match_fail : 1; + RK_U32 reserve7 : 2; + } reg011; + + struct SWREG12_SENCODARY_EN { + RK_U32 wr_ddr_align_en : 1; + RK_U32 colmv_compress_en : 1; + RK_U32 fbc_e : 1; + RK_U32 tile_e : 1; + + RK_U32 reserve1 : 1; + RK_U32 error_info_en : 1; + RK_U32 info_collect_en : 1; + RK_U32 reserve2 : 1; //change to reg205[4] + + RK_U32 scanlist_addr_valid_en : 1; + RK_U32 scale_down_en : 1; + RK_U32 reserve3 : 22; + } reg012; + + struct SWREG13_EN_MODE_SET { + RK_U32 reserve0 : 1; + RK_U32 req_timeout_rst_sel : 1; + RK_U32 reserve1 : 1; + RK_U32 dec_commonirq_mode : 1; + RK_U32 reserve2 : 2; + RK_U32 stmerror_waitdecfifo_empty : 1; + RK_U32 reserve3 : 1; + RK_U32 strmd_zero_rm_en : 1; + RK_U32 reserve4 : 3; + RK_U32 allow_not_wr_unref_bframe : 1; + RK_U32 fbc_output_wr_disable : 1; + + RK_U32 reserve5 : 4; + RK_U32 h26x_error_mode : 1; + RK_U32 reserve6 : 5; + RK_U32 cur_pic_is_idr : 1; + RK_U32 reserve8 : 6; //change to reg205[5] + RK_U32 filter_outbuf_mode : 1; + + /* develop branch */ + // RK_U32 reserve5 : 2; + // RK_U32 h26x_error_mode : 1; + // RK_U32 reserve6 : 2; + // RK_U32 ycacherd_prior : 1; + // RK_U32 reserve7 : 2; + // RK_U32 cur_pic_is_idr : 1; + // RK_U32 reserve8 : 1; + // RK_U32 right_auto_rst_disable : 1; + // RK_U32 frame_end_err_rst_flag : 1; + // RK_U32 rd_prior_mode : 1; + // RK_U32 rd_ctrl_prior_mode : 1; + // RK_U32 reserved9 : 1; + // RK_U32 filter_outbuf_mode : 1; + } reg013; + + struct SWREG14_FBC_PARAM_SET { + RK_U32 fbc_force_uncompress : 1; + + RK_U32 reserve0 : 2; + RK_U32 allow_16x8_cp_flag : 1; + RK_U32 reserve1 : 2; + + RK_U32 fbc_h264_exten_4or8_flag: 1; + RK_U32 reserve2 : 25; + } reg014; + + struct SWREG15_STREAM_PARAM_SET { + RK_U32 rlc_mode_direct_write : 1; + RK_U32 rlc_mode : 1; + RK_U32 strmd_ofifo_perf_opt_en : 1; + RK_U32 reserve0 : 2; + + RK_U32 strm_start_bit : 7; + RK_U32 reserve1 : 20; + } reg015; + + RK_U32 reg016_str_len; + + struct SWREG17_SLICE_NUMBER { + RK_U32 slice_num : 25; + RK_U32 reserve : 7; + } reg017; + + struct SWREG18_Y_HOR_STRIDE { + RK_U32 y_hor_virstride : 16; + RK_U32 reserve : 16; + } reg018; + + struct SWREG19_UV_HOR_STRIDE { + RK_U32 uv_hor_virstride : 16; + RK_U32 reserve : 16; + } reg019; + + union { + struct SWREG20_Y_STRIDE { + RK_U32 y_virstride : 28; + RK_U32 reserve : 4; + } reg020_y_virstride; + + struct SWREG20_FBC_PAYLOAD_OFFSET { + RK_U32 reserve : 4; + RK_U32 payload_st_offset : 28; + } reg020_fbc_payload_off; + }; + + + struct SWREG21_ERROR_CTRL_SET { + RK_U32 inter_error_prc_mode : 1; + RK_U32 error_intra_mode : 1; + RK_U32 error_deb_en : 1; + RK_U32 picidx_replace : 5; + RK_U32 error_spread_e : 1; + RK_U32 : 3; + RK_U32 error_inter_pred_cross_slice : 1; + RK_U32 reserve0 : 11; + + RK_U32 roi_error_ctu_cal_en : 1; + RK_U32 reserve1 : 7; + } reg021; + + struct SWREG22_ERR_ROI_CTU_OFFSET_START { + RK_U32 roi_x_ctu_offset_st : 12; + RK_U32 reserve0 : 4; + RK_U32 roi_y_ctu_offset_st : 12; + RK_U32 reserve1 : 4; + } reg022; + + struct SWREG23_ERR_ROI_CTU_OFFSET_END { + RK_U32 roi_x_ctu_offset_end : 12; + RK_U32 reserve0 : 4; + RK_U32 roi_y_ctu_offset_end : 12; + RK_U32 reserve1 : 4; + } reg023; + + struct SWREG24_CABAC_ERROR_EN_LOWBITS { + RK_U32 cabac_err_en_lowbits : 32; + } reg024; + + struct SWREG25_CABAC_ERROR_EN_HIGHBITS { + RK_U32 cabac_err_en_highbits : 30; + RK_U32 reserve : 2; + } reg025; + + struct SWREG26_BLOCK_GATING_EN { + RK_U32 swreg_block_gating_e : 20; + RK_U32 reserve : 11; + RK_U32 reg_cfg_gating_en : 1; + } reg026; + + /* NOTE: reg027 ~ reg032 are added in vdpu38x at rk3588 */ + struct SW027_CORE_SAFE_PIXELS { + // colmv and recon report coord x safe pixels + RK_U32 core_safe_x_pixels : 16; + // colmv and recon report coord y safe pixels + RK_U32 core_safe_y_pixels : 16; + } reg027; + + struct SWREG28_MULTIPLY_CORE_CTRL { + RK_U32 swreg_vp9_wr_prob_idx : 3; + RK_U32 reserve0 : 1; + RK_U32 swreg_vp9_rd_prob_idx : 3; + RK_U32 reserve1 : 1; + + RK_U32 swreg_ref_req_advance_flag : 1; + RK_U32 sw_colmv_req_advance_flag : 1; + RK_U32 sw_poc_only_highbit_flag : 1; + RK_U32 sw_poc_arb_flag : 1; + + RK_U32 reserve2 : 4; + RK_U32 sw_film_idx : 10; + RK_U32 reserve3 : 2; + RK_U32 sw_pu_req_mismatch_dis : 1; + RK_U32 sw_colmv_req_mismatch_dis : 1; + RK_U32 reserve4 : 2; + } reg028; + + struct SWREG29_SCALE_DOWN_CTRL { + RK_U32 scale_down_y_wratio : 5; + RK_U32 reserve0 : 3; + RK_U32 scale_down_y_hratio : 5; + RK_U32 reserve1 : 3; + RK_U32 scale_down_c_wratio : 5; + RK_U32 reserve2 : 3; + RK_U32 scale_down_c_hratio : 5; + RK_U32 reserve3 : 1; + RK_U32 scale_down_roi_mode : 1; + RK_U32 scale_down_tile_mode : 1; + } reg029; + + struct SW032_Y_SCALE_DOWN_TILE8x8_HOR_STRIDE { + RK_U32 y_scale_down_hor_stride : 20; + RK_U32 : 12; + } reg030; + + struct SW031_UV_SCALE_DOWN_TILE8x8_HOR_STRIDE { + RK_U32 uv_scale_down_hor_stride : 20; + RK_U32 : 12; + } reg031; + + /* NOTE: reg027 ~ reg032 are added in vdpu38x at rk3588 */ + /* NOTE: timeout must be config in vdpu38x */ + RK_U32 reg032_timeout_threshold; +} Vdpu382RegCommon; + +/* base: OFFSET_COMMON_ADDR_REGS */ +typedef struct Vdpu382RegCommonAddr_t { + /* offset 128 */ + RK_U32 reg128_rlc_base; + + RK_U32 reg129_rlcwrite_base; + + RK_U32 reg130_decout_base; + + RK_U32 reg131_colmv_cur_base; + + RK_U32 reg132_error_ref_base; + + RK_U32 reg133_rcb_intra_base; + + RK_U32 reg134_rcb_transd_row_base; + + RK_U32 reg135_rcb_transd_col_base; + + RK_U32 reg136_rcb_streamd_row_base; + + RK_U32 reg137_rcb_inter_row_base; + + RK_U32 reg138_rcb_inter_col_base; + + RK_U32 reg139_rcb_dblk_base; + + RK_U32 reg140_rcb_sao_base; + + RK_U32 reg141_rcb_fbc_base; + + RK_U32 reg142_rcb_filter_col_base; +} Vdpu382RegCommonAddr; + +/* base: OFFSET_COMMON_ADDR_REGS */ +typedef struct Vdpu382RegIrqStatus_t { + struct SWREG224_STA_INT { + RK_U32 dec_irq : 1; + RK_U32 dec_irq_raw : 1; + + RK_U32 dec_rdy_sta : 1; + RK_U32 dec_bus_sta : 1; + RK_U32 dec_error_sta : 1; + RK_U32 dec_timeout_sta : 1; + RK_U32 buf_empty_sta : 1; + RK_U32 colmv_ref_error_sta : 1; + RK_U32 cabu_end_sta : 1; + + RK_U32 softreset_rdy : 1; + + RK_U32 dec_line_irq : 1; + RK_U32 dec_line_irq_raw : 1; + RK_U32 ltb_pause_sta : 1; + RK_U32 mmureset_rdy : 1; + RK_U32 ltb_end_sta : 1; + + RK_U32 reserve : 17; + } reg224; + + struct SWREG225_STA_ERR_INFO { + RK_U32 all_frame_error_flag : 1; + RK_U32 strmd_detect_error_flag : 1; + RK_U32 reserve : 30; + } reg225; + + struct SWREG226_STA_CABAC_ERROR_STATUS { + RK_U32 strmd_error_status : 28; + RK_U32 reserve : 4; + } reg226; + + struct SWREG227_STA_COLMV_ERROR_REF_PICIDX { + RK_U32 colmv_error_ref_picidx : 4; + RK_U32 reserve : 28; + } reg227; + + struct SWREG228_STA_CABAC_ERROR_CTU_OFFSET { + RK_U32 cabac_error_ctu_offset_x : 12; + RK_U32 : 4; + RK_U32 cabac_error_ctu_offset_y : 12; + RK_U32 : 4; + } reg228; + + struct SWREG229_STA_SAOWR_CTU_OFFSET { + RK_U32 saowr_xoffset : 16; + RK_U32 saowr_yoffset : 16; + } reg229; + + struct SWREG230_STA_SLICE_DEC_NUM { + RK_U32 slicedec_num : 25; + RK_U32 reserve : 7; + } reg230; + + struct SWREG231_STA_FRAME_ERROR_CTU_NUM { + RK_U32 frame_ctu_err_num : 32; + } reg231; + + struct SWREG232_STA_ERROR_PACKET_NUM { + RK_U32 packet_err_num : 16; + RK_U32 reserve : 16; + } reg232; + + struct SWREG233_STA_ERR_CTU_NUM_IN_RO { + RK_U32 error_ctu_num_in_roi : 24; + RK_U32 reserve : 8; + } reg233; + + struct SWREG234_BUF_EMPTY_OFFSET { + RK_U32 coord_report_offset_x : 16; + RK_U32 coord_report_offset_y : 16; + } reg234; + + struct SWREG235_COORD_REPORT_OUTBUF_HEIGHT { + RK_U32 coord_report_output_height : 16; + RK_U32 reserve : 16; + } reg235; + + RK_U32 reserve_reg236_237[2]; +} Vdpu382RegIrqStatus; + +typedef struct Vdpu382RegStatistic_t { + struct SWREG256_DEBUG_PERF_LATENCY_CTRL0 { + RK_U32 axi_perf_work_e : 1; + RK_U32 axi_perf_clr_e : 1; + RK_U32 reserve0 : 1; + RK_U32 axi_cnt_type : 1; + RK_U32 rd_latency_id : 4; + RK_U32 rd_latency_thr : 12; + RK_U32 reserve1 : 12; + } reg256; + + struct SWREG257_DEBUG_PERF_LATENCY_CTRL1 { + RK_U32 addr_align_type : 2; + RK_U32 ar_cnt_id_type : 1; + RK_U32 aw_cnt_id_type : 1; + RK_U32 ar_count_id : 4; + RK_U32 aw_count_id : 4; + RK_U32 rd_band_width_mode : 1; + RK_U32 reserve : 19; + } reg257; + + struct SWREG258_DEBUG_PERF_RD_MAX_LATENCY_NUM { + RK_U32 rd_max_latency_num : 16; + RK_U32 reserve : 16; + } reg258; + + RK_U32 reg259_rd_latency_thr_num_ch0; + RK_U32 reg260_rd_latency_acc_sum; + RK_U32 reg261_perf_rd_axi_total_byte; + RK_U32 reg262_perf_wr_axi_total_byte; + RK_U32 reg263_perf_working_cnt; + + RK_U32 reserve_reg264; + + struct SWREG265_DEBUG_PERF_SEL { + RK_U32 perf_cnt0_sel : 6; + RK_U32 reserve0 : 2; + RK_U32 perf_cnt1_sel : 6; + RK_U32 reserve1 : 2; + RK_U32 perf_cnt2_sel : 6; + RK_U32 reserve2 : 10; + } reg265; + + RK_U32 reg266_perf_cnt0; + RK_U32 reg267_perf_cnt1; + RK_U32 reg268_perf_cnt2; + + RK_U32 reserve_reg269; + + struct SWREG270_DEBUG_QOS_CTRL { + RK_U32 bus2mc_buffer_qos_level : 8; + RK_U32 reserve0 : 8; + RK_U32 axi_rd_hurry_level : 2; + RK_U32 reserve1 : 2; + RK_U32 axi_wr_qos : 2; + RK_U32 reserve2 : 2; + RK_U32 axi_wr_hurry_level : 2; + RK_U32 reserve3 : 2; + RK_U32 axi_rd_qos : 2; + RK_U32 reserve4 : 2; + } reg270; + + RK_U32 reg271_wr_wait_cycle_qos; + + struct SWREG272_DEBUG_INT { + RK_U32 busidle_flag : 1; + RK_U32 reserved : 4; + RK_U32 mmu_busidle_flag : 1; + RK_U32 wr_tansfer_cnt : 8; + RK_U32 reserved1 : 2; + RK_U32 Sw_streamfifo_space2full : 7; + RK_U32 reserved2 : 1; + RK_U32 mmu_wr_transer_cnt : 8; + } reg272; + + struct SWREG273 { + RK_U32 bus_status_flag : 19; + RK_U32 reserve0 : 12; + RK_U32 pps_no_ref_bframe_dec_r : 1; + } reg273; + + RK_U16 reg274_y_min_value; + RK_U16 reg274_y_max_value; + RK_U16 reg275_u_min_value; + RK_U16 reg275_u_max_value; + RK_U16 reg276_v_min_value; + RK_U16 reg276_v_max_value; + + struct SWREG277_ERROR_SPREAD_NUM { + RK_U32 err_spread_cnt_sum : 24; + RK_U32 : 8; + } reg277; + + // RK_U32 reg277_err_spread_num; + // struct SWREG278_DEC_LINE_OFFSET_Y { + // RK_U32 dec_line_offset_y : 16; + // RK_U32 reserve : 16; + // } reg278; + +} Vdpu382RegStatistic; + +typedef struct vdpu382_rcb_info_t { + RK_S32 reg; + RK_S32 size; + RK_S32 offset; +} Vdpu382RcbInfo; + +#ifdef __cplusplus +extern "C" { +#endif + +RK_S32 vdpu382_get_rcb_buf_size(Vdpu382RcbInfo *info, RK_S32 width, RK_S32 height); +void vdpu382_setup_rcb(Vdpu382RegCommonAddr *reg, MppDev dev, MppBuffer buf, Vdpu382RcbInfo *info); +RK_S32 vdpu382_compare_rcb_size(const void *a, const void *b); +void vdpu382_setup_statistic(Vdpu382RegCommon *com, Vdpu382RegStatistic *sta); +void vdpu382_afbc_align_calc(MppBufSlots slots, MppFrame frame, RK_U32 expand); + +#ifdef __cplusplus +} +#endif + +#endif /* __VDPU382_COM_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu382_h264d.h b/mpp/hal/rkdec/inc/vdpu382_h264d.h new file mode 100644 index 00000000..fe275b3d --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu382_h264d.h @@ -0,0 +1,269 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VDPU382_H264D_H__ +#define __VDPU382_H264D_H__ + +#include "vdpu382_com.h" + +/* base: OFFSET_CODEC_PARAMS_REGS */ +typedef struct Vdpu382RegH264dParam_t { + struct SWREG64_H26X_SET { + RK_U32 h26x_frame_orslice : 1; + RK_U32 h26x_rps_mode : 1; + RK_U32 h26x_stream_mode : 1; + RK_U32 h26x_stream_lastpacket : 1; + RK_U32 h264_firstslice_flag : 1; + RK_U32 reserve : 27; + } reg64; + + struct SWREG65_CUR_POC { + RK_U32 cur_top_poc : 32; + } reg65; + + struct SWREG66_H264_CUR_POC1 { + RK_U32 cur_bot_poc : 32; + } reg66; + + RK_U32 reg67_98_ref_poc[32]; + + struct SWREG99_H264_REG0_3_INFO { + + RK_U32 ref0_field : 1; + RK_U32 ref0_topfield_used : 1; + RK_U32 ref0_botfield_used : 1; + RK_U32 ref0_colmv_use_flag : 1; + RK_U32 ref0_reserve : 4; + + RK_U32 ref1_field : 1; + RK_U32 ref1_topfield_used : 1; + RK_U32 ref1_botfield_used : 1; + RK_U32 ref1_colmv_use_flag : 1; + RK_U32 ref1_reserve : 4; + + RK_U32 ref2_field : 1; + RK_U32 ref2_topfield_used : 1; + RK_U32 ref2_botfield_used : 1; + RK_U32 ref2_colmv_use_flag : 1; + RK_U32 ref2_reserve : 4; + + RK_U32 ref3_field : 1; + RK_U32 ref3_topfield_used : 1; + RK_U32 ref3_botfield_used : 1; + RK_U32 ref3_colmv_use_flag : 1; + RK_U32 ref3_reserve : 4; + } reg99; + + struct SWREG100_H264_REG4_7_INFO { + + RK_U32 ref4_field : 1; + RK_U32 ref4_topfield_used : 1; + RK_U32 ref4_botfield_used : 1; + RK_U32 ref4_colmv_use_flag : 1; + RK_U32 ref4_reserve : 4; + + RK_U32 ref5_field : 1; + RK_U32 ref5_topfield_used : 1; + RK_U32 ref5_botfield_used : 1; + RK_U32 ref5_colmv_use_flag : 1; + RK_U32 ref5_reserve : 4; + + RK_U32 ref6_field : 1; + RK_U32 ref6_topfield_used : 1; + RK_U32 ref6_botfield_used : 1; + RK_U32 ref6_colmv_use_flag : 1; + RK_U32 ref6_reserve : 4; + + RK_U32 ref7_field : 1; + RK_U32 ref7_topfield_used : 1; + RK_U32 ref7_botfield_used : 1; + RK_U32 ref7_colmv_use_flag : 1; + RK_U32 ref7_reserve : 4; + } reg100; + + struct SWREG101_H264_REG8_11_INFO { + + RK_U32 ref8_field : 1; + RK_U32 ref8_topfield_used : 1; + RK_U32 ref8_botfield_used : 1; + RK_U32 ref8_colmv_use_flag : 1; + RK_U32 ref8_reserve : 4; + + RK_U32 ref9_field : 1; + RK_U32 ref9_topfield_used : 1; + RK_U32 ref9_botfield_used : 1; + RK_U32 ref9_colmv_use_flag : 1; + RK_U32 ref9_reserve : 4; + + RK_U32 ref10_field : 1; + RK_U32 ref10_topfield_used : 1; + RK_U32 ref10_botfield_used : 1; + RK_U32 ref10_colmv_use_flag : 1; + RK_U32 ref10_reserve : 4; + + RK_U32 ref11_field : 1; + RK_U32 ref11_topfield_used : 1; + RK_U32 ref11_botfield_used : 1; + RK_U32 ref11_colmv_use_flag : 1; + RK_U32 ref11_reserve : 4; + } reg101; + + struct SWREG102_H264_REG12_15_INFO { + + RK_U32 ref12_field : 1; + RK_U32 ref12_topfield_used : 1; + RK_U32 ref12_botfield_used : 1; + RK_U32 ref12_colmv_use_flag : 1; + RK_U32 ref12_reserve : 4; + + RK_U32 ref13_field : 1; + RK_U32 ref13_topfield_used : 1; + RK_U32 ref13_botfield_used : 1; + RK_U32 ref13_colmv_use_flag : 1; + RK_U32 ref13_reserve : 4; + + RK_U32 ref14_field : 1; + RK_U32 ref14_topfield_used : 1; + RK_U32 ref14_botfield_used : 1; + RK_U32 ref14_colmv_use_flag : 1; + RK_U32 ref14_reserve : 4; + + RK_U32 ref15_field : 1; + RK_U32 ref15_topfield_used : 1; + RK_U32 ref15_botfield_used : 1; + RK_U32 ref15_colmv_use_flag : 1; + RK_U32 ref15_reserve : 4; + } reg102; + + struct SWREG103_111_NO_USE_REGS { + RK_U32 reserve; + } no_use_regs[9]; + + struct SWREG112_ERROR_REF_INFO { + RK_U32 avs2_ref_error_field : 1; + RK_U32 avs2_ref_error_topfield : 1; + RK_U32 ref_error_topfield_used : 1; + RK_U32 ref_error_botfield_used : 1; + RK_U32 reserve : 28; + } reg112; +} Vdpu382RegH264dParam; + +/* base: OFFSET_CODEC_ADDR_REGS */ +typedef struct Vdpu382RegH264dAddr_t { + /* SWREG160 */ + RK_U32 reg160_no_use; + + /* SWREG161 */ + RK_U32 pps_base; + + /* SWREG162 */ + RK_U32 reg162_no_use; + + /* SWREG163 */ + RK_U32 rps_base; + + /* SWREG164~179 */ + RK_U32 ref_base[16]; + + /* SWREG180 */ + RK_U32 scanlist_addr; + + /* SWREG181~196 */ + RK_U32 colmv_base[16]; + + /* SWREG197 */ + RK_U32 cabactbl_base; +} Vdpu382RegH264dAddr; + +typedef struct Vdpu382H264dHighPoc_t { + /* SWREG200 */ + struct SWREG200_REF0_7_POC_HIGHBIT { + RK_U32 ref0_poc_highbit : 4; + RK_U32 ref1_poc_highbit : 4; + RK_U32 ref2_poc_highbit : 4; + RK_U32 ref3_poc_highbit : 4; + RK_U32 ref4_poc_highbit : 4; + RK_U32 ref5_poc_highbit : 4; + RK_U32 ref6_poc_highbit : 4; + RK_U32 ref7_poc_highbit : 4; + } reg200; + struct SWREG201_REF8_15_POC_HIGHBIT { + RK_U32 ref8_poc_highbit : 4; + RK_U32 ref9_poc_highbit : 4; + RK_U32 ref10_poc_highbit : 4; + RK_U32 ref11_poc_highbit : 4; + RK_U32 ref12_poc_highbit : 4; + RK_U32 ref13_poc_highbit : 4; + RK_U32 ref14_poc_highbit : 4; + RK_U32 ref15_poc_highbit : 4; + } reg201; + struct SWREG200_REF16_23_POC_HIGHBIT { + RK_U32 ref16_poc_highbit : 4; + RK_U32 ref17_poc_highbit : 4; + RK_U32 ref18_poc_highbit : 4; + RK_U32 ref19_poc_highbit : 4; + RK_U32 ref20_poc_highbit : 4; + RK_U32 ref21_poc_highbit : 4; + RK_U32 ref22_poc_highbit : 4; + RK_U32 ref23_poc_highbit : 4; + } reg202; + struct SWREG200_REF24_31_POC_HIGHBIT { + RK_U32 ref24_poc_highbit : 4; + RK_U32 ref25_poc_highbit : 4; + RK_U32 ref26_poc_highbit : 4; + RK_U32 ref27_poc_highbit : 4; + RK_U32 ref28_poc_highbit : 4; + RK_U32 ref29_poc_highbit : 4; + RK_U32 ref30_poc_highbit : 4; + RK_U32 ref31_poc_highbit : 4; + } reg203; + struct SWREG200_CUR_POC_HIGHBIT { + RK_U32 cur_poc_highbit : 4; + RK_U32 reserver : 28; + } reg204; + + struct SWREG205_DEBUG_INFO { + RK_U32 force_softreset_valid : 1; + RK_U32 force_mmureset_valid : 1; + RK_U32 reserve0 : 2; + RK_U32 error_auto_rst_disable : 1; + RK_U32 right_auto_rst_disable : 1; + RK_U32 buf_empty_security_en : 1; + RK_U32 coord_realtime_report_en : 1; + + RK_U32 fetchcmd_merge_dis : 1; + RK_U32 dec_timeout_dis : 1; + RK_U32 reg_cfg_wr_dis : 1; + RK_U32 reserve1 : 1; + RK_U32 force_busidle_req : 1; + RK_U32 mmu_force_busidle_req : 1; + RK_U32 mmu_sel : 1; + RK_U32 reserve2 : 17; + + } reg205; +} Vdpu382H264dHighPoc_t; + +typedef struct Vdpu382H264dRegSet_t { + Vdpu382RegCommon common; + Vdpu382RegH264dParam h264d_param; + Vdpu382RegCommonAddr common_addr; + Vdpu382RegH264dAddr h264d_addr; + Vdpu382H264dHighPoc_t h264d_highpoc; + Vdpu382RegIrqStatus irq_status; + Vdpu382RegStatistic statistic; +} Vdpu382H264dRegSet; + +#endif /* __VDPU382_H264D_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu382_h265d.h b/mpp/hal/rkdec/inc/vdpu382_h265d.h new file mode 100644 index 00000000..1b7a07f6 --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu382_h265d.h @@ -0,0 +1,208 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VDPU382_H265D_H__ +#define __VDPU382_H265D_H__ + +#include "vdpu382_com.h" + +typedef struct Vdpu382RegH265d_t { + struct SWREG64_H26X_SET { + RK_U32 h26x_frame_orslice : 1; + RK_U32 h26x_rps_mode : 1; + RK_U32 h26x_stream_mode : 1; + RK_U32 h26x_stream_lastpacket : 1; + RK_U32 h264_firstslice_flag : 1; + RK_U32 reserve : 27; + } reg64; + + struct SWREG65_CUR_POC { + RK_U32 cur_top_poc : 32; + } reg65; + + struct SWREG66_H264_CUR_POC1 { + RK_U32 cur_bot_poc : 32; + } reg66; + + RK_U32 reg67_82_ref_poc[16]; + + + struct SWREG83_98_H264_REF_POC { + RK_U32 ref_poc : 32; + } ref_poc_no_use[16]; + + /* struct SWREG99_HEVC_REF_VALID{ + RK_U32 hevc_ref_valid : 15; + RK_U32 reserve : 17; + }hevc_ref_valid; */ + + struct SWREG99_HEVC_REF_VALID { + RK_U32 hevc_ref_valid_0 : 1; + RK_U32 hevc_ref_valid_1 : 1; + RK_U32 hevc_ref_valid_2 : 1; + RK_U32 hevc_ref_valid_3 : 1; + RK_U32 reserve0 : 4; + RK_U32 hevc_ref_valid_4 : 1; + RK_U32 hevc_ref_valid_5 : 1; + RK_U32 hevc_ref_valid_6 : 1; + RK_U32 hevc_ref_valid_7 : 1; + RK_U32 reserve1 : 4; + RK_U32 hevc_ref_valid_8 : 1; + RK_U32 hevc_ref_valid_9 : 1; + RK_U32 hevc_ref_valid_10 : 1; + RK_U32 hevc_ref_valid_11 : 1; + RK_U32 reserve2 : 4; + RK_U32 hevc_ref_valid_12 : 1; + RK_U32 hevc_ref_valid_13 : 1; + RK_U32 hevc_ref_valid_14 : 1; + RK_U32 reserve3 : 5; + } reg99; + + RK_U32 reg100_102_no_use[3]; + + struct SWREG103_HEVC_MVC0 { + RK_U32 ref_pic_layer_same_with_cur : 16; + RK_U32 reserve : 16; + } reg103; + + struct SWREG104_HEVC_MVC1 { + RK_U32 poc_lsb_not_present_flag : 1; + RK_U32 num_direct_ref_layers : 6; + RK_U32 reserve0 : 1; + + RK_U32 num_reflayer_pics : 6; + RK_U32 default_ref_layers_active_flag : 1; + RK_U32 max_one_active_ref_layer_flag : 1; + + RK_U32 poc_reset_info_present_flag : 1; + RK_U32 vps_poc_lsb_aligned_flag : 1; + RK_U32 mvc_poc15_valid_flag : 1; + RK_U32 reserve1 : 13; + } reg104; + + struct SWREG105_111_NO_USE_REGS { + RK_U32 no_use_regs[7]; + } no_use_regs; + + struct SWREG112_ERROR_REF_INFO { + RK_U32 avs2_ref_error_field : 1; + RK_U32 avs2_ref_error_topfield : 1; + RK_U32 ref_error_topfield_used : 1; + RK_U32 ref_error_botfield_used : 1; + RK_U32 reserve : 28; + } reg112; + +} Vdpu382RegH265d; + +typedef struct Vdpu382RegH265dAddr_t { + struct SWREG160_VP9_DELTA_PROB_BASE { + RK_U32 vp9_delta_prob_base : 32; + } reg160_no_use; + + RK_U32 reg161_pps_base; + + RK_U32 reg162_no_use; + + RK_U32 reg163_rps_base; + + RK_U32 reg164_179_ref_base[16]; + + RK_U32 reg180_scanlist_addr; + + RK_U32 reg181_196_colmv_base[16]; + + RK_U32 reg197_cabactbl_base; +} Vdpu382RegH265dAddr; + +typedef struct Vdpu382H265dHighPoc_t { + /* SWREG200 */ + struct SWREG200_REF0_7_POC_HIGHBIT { + RK_U32 ref0_poc_highbit : 4; + RK_U32 ref1_poc_highbit : 4; + RK_U32 ref2_poc_highbit : 4; + RK_U32 ref3_poc_highbit : 4; + RK_U32 ref4_poc_highbit : 4; + RK_U32 ref5_poc_highbit : 4; + RK_U32 ref6_poc_highbit : 4; + RK_U32 ref7_poc_highbit : 4; + } reg200; + struct SWREG201_REF8_15_POC_HIGHBIT { + RK_U32 ref8_poc_highbit : 4; + RK_U32 ref9_poc_highbit : 4; + RK_U32 ref10_poc_highbit : 4; + RK_U32 ref11_poc_highbit : 4; + RK_U32 ref12_poc_highbit : 4; + RK_U32 ref13_poc_highbit : 4; + RK_U32 ref14_poc_highbit : 4; + RK_U32 ref15_poc_highbit : 4; + } reg201; + struct SWREG200_REF16_23_POC_HIGHBIT { + RK_U32 ref16_poc_highbit : 4; + RK_U32 ref17_poc_highbit : 4; + RK_U32 ref18_poc_highbit : 4; + RK_U32 ref19_poc_highbit : 4; + RK_U32 ref20_poc_highbit : 4; + RK_U32 ref21_poc_highbit : 4; + RK_U32 ref22_poc_highbit : 4; + RK_U32 ref23_poc_highbit : 4; + } reg202; + struct SWREG200_REF24_31_POC_HIGHBIT { + RK_U32 ref24_poc_highbit : 4; + RK_U32 ref25_poc_highbit : 4; + RK_U32 ref26_poc_highbit : 4; + RK_U32 ref27_poc_highbit : 4; + RK_U32 ref28_poc_highbit : 4; + RK_U32 ref29_poc_highbit : 4; + RK_U32 ref30_poc_highbit : 4; + RK_U32 ref31_poc_highbit : 4; + } reg203; + struct SWREG200_CUR_POC_HIGHBIT { + RK_U32 cur_poc_highbit : 4; + RK_U32 reserver : 28; + } reg204; + + struct SWREG205_DEBUG_INFO { + RK_U32 force_softreset_valid : 1; + RK_U32 force_mmureset_valid : 1; + RK_U32 reserve0 : 2; + RK_U32 error_auto_rst_disable : 1; + RK_U32 right_auto_rst_disable : 1; + RK_U32 buf_empty_security_en : 1; + RK_U32 coord_realtime_report_en : 1; + + RK_U32 fetchcmd_merge_dis : 1; + RK_U32 dec_timeout_dis : 1; + RK_U32 reg_cfg_wr_dis : 1; + RK_U32 reserve1 : 1; + RK_U32 force_busidle_req : 1; + RK_U32 mmu_force_busidle_req : 1; + RK_U32 mmu_sel : 1; + RK_U32 reserve2 : 17; + + } reg205; +} Vdpu382H2645HighPoc_t; + +typedef struct Vdpu382H265dRegSet_t { + Vdpu382RegCommon common; + Vdpu382RegH265d h265d_param; + Vdpu382RegCommonAddr common_addr; + Vdpu382RegH265dAddr h265d_addr; + Vdpu382H2645HighPoc_t highpoc; + Vdpu382RegIrqStatus irq_status; + Vdpu382RegStatistic statistic; +} Vdpu382H265dRegSet; + +#endif /* __VDPU382_H265D_H__ */ diff --git a/mpp/hal/rkdec/inc/vdpu382_vp9d.h b/mpp/hal/rkdec/inc/vdpu382_vp9d.h new file mode 100644 index 00000000..ba6cab6b --- /dev/null +++ b/mpp/hal/rkdec/inc/vdpu382_vp9d.h @@ -0,0 +1,315 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_VDPU382_VP9D_H__ +#define __HAL_VDPU382_VP9D_H__ + +#include "rk_type.h" +#include "vdpu382_com.h" + + +typedef struct Vdpu382RegVp9dParam_t { + struct SWREG64_VP9_SET { + RK_U32 cprheader_offset : 16; + RK_U32 reserve : 16; + } reg64; + + struct SWREG65_CUR_POC { + RK_U32 cur_poc : 32; + } reg65; + + RK_U32 reg66; + + struct SWREG67_74_VP9_SEGID_GRP { + RK_U32 segid_abs_delta : 1; + RK_U32 segid_frame_qp_delta_en : 1; + RK_U32 segid_frame_qp_delta : 9; + RK_U32 segid_frame_loopfitler_value_en : 1; + RK_U32 segid_frame_loopfilter_value : 7; + RK_U32 segid_referinfo_en : 1; + RK_U32 segid_referinfo : 2; + RK_U32 segid_frame_skip_en : 1; + RK_U32 reserve : 9; + } reg67_74[8]; + + struct SWREG75_VP9_INFO_LASTFRAME { + RK_U32 mode_deltas_lastframe : 14; + RK_U32 vp9_segment_id_clear : 1; + RK_U32 vp9_segment_id_update : 1; + RK_U32 segmentation_enable_lstframe : 1; + RK_U32 last_show_frame : 1; + RK_U32 last_intra_only : 1; + RK_U32 last_widthheight_eqcur : 1; + RK_U32 color_space_lastkeyframe : 3; + RK_U32 reserve1 : 9; + } reg75; + + struct SWREG76_VP9_CPRHEADER_CONFIG { + RK_U32 tx_mode : 3; + RK_U32 frame_reference_mode : 2; + RK_U32 reserve : 27; + } reg76; + + struct SWREG77_VP9_INTERCMD_NUM { + RK_U32 intercmd_num : 24; + RK_U32 reserve : 8; + } reg77; + + struct SWREG78_VP9_LASTTILE_SIZE { + RK_U32 lasttile_size : 24; + RK_U32 reserve : 8; + } reg78; + + struct SWREG79_VP9_LASTF_Y_HOR_VIRSTRIDE { + RK_U32 lastfy_hor_virstride : 16; + RK_U32 reserve : 16; + } reg79; + + struct SWREG80_VP9_LASTF_UV_HOR_VIRSTRIDE { + RK_U32 lastfuv_hor_virstride : 16; + RK_U32 reserve : 16; + } reg80; + + struct SWREG81_VP9_GOLDENF_Y_HOR_VIRSTRIDE { + RK_U32 goldenfy_hor_virstride : 16; + RK_U32 reserve : 16; + } reg81; + + struct SWREG82_VP9_GOLDENF_UV_HOR_VIRSTRIDE { + RK_U32 goldenfuv_hor_virstride : 16; + RK_U32 reserve : 16; + } reg82; + + struct SWREG83_VP9_ALTREFF_Y_HOR_VIRSTRIDE { + RK_U32 altreffy_hor_virstride : 16; + RK_U32 reserve : 16; + } reg83; + + struct SWREG84_VP9_ALTREFF_UV_HOR_VIRSTRIDE { + RK_U32 altreffuv_hor_virstride : 16; + RK_U32 reserve : 16; + } reg84; + + struct SWREG85_VP9_LASTF_Y_VIRSTRIDE { + RK_U32 lastfy_virstride : 28; + RK_U32 reserve : 4; + } reg85; + + struct SWREG86_VP9_GOLDEN_Y_VIRSTRIDE { + RK_U32 goldeny_virstride : 28; + RK_U32 reserve : 4; + } reg86; + + struct SWREG87_VP9_ALTREF_Y_VIRSTRIDE { + RK_U32 altrefy_virstride : 28; + RK_U32 reserve : 4; + } reg87; + + struct SWREG88_VP9_LREF_HOR_SCALE { + RK_U32 lref_hor_scale : 16; + RK_U32 reserve : 16; + } reg88; + + struct SWREG89_VP9_LREF_VER_SCALE { + RK_U32 lref_ver_scale : 16; + RK_U32 reserve : 16; + } reg89; + + struct SWREG90_VP9_GREF_HOR_SCALE { + RK_U32 gref_hor_scale : 16; + RK_U32 reserve : 16; + } reg90; + + struct SWREG91_VP9_GREF_VER_SCALE { + RK_U32 gref_ver_scale : 16; + RK_U32 reserve : 16; + } reg91; + + struct SWREG92_VP9_AREF_HOR_SCALE { + RK_U32 aref_hor_scale : 16; + RK_U32 reserve : 16; + } reg92; + + struct SWREG93_VP9_AREF_VER_SCALE { + RK_U32 aref_ver_scale : 16; + RK_U32 reserve : 16; + } reg93; + + struct SWREG94_VP9_REF_DELTAS_LASTFRAME { + RK_U32 ref_deltas_lastframe : 28; + RK_U32 reserve : 4; + } reg94; + + struct SWREG95_LAST_POC { + RK_U32 last_poc : 32; + } reg95; + + struct SWREG96_GOLDEN_POC { + RK_U32 golden_poc : 32; + } reg96; + + struct SWREG97_ALTREF_POC { + RK_U32 altref_poc : 32; + } reg97; + + struct SWREG98_COF_REF_POC { + RK_U32 col_ref_poc : 32; + } reg98; + + struct SWREG99_PROB_REF_POC { + RK_U32 prob_ref_poc : 32; + } reg99; + + struct SWREG100_SEGID_REF_POC { + RK_U32 segid_ref_poc : 32; + } reg100; + + RK_U32 reg101_102_no_use[2]; + + struct SWREG103_VP9_PROB_EN { + RK_U32 reserve : 20; + RK_U32 prob_update_en : 1; + RK_U32 refresh_en : 1; + RK_U32 prob_save_en : 1; + RK_U32 intra_only_flag : 1; + + RK_U32 txfmmode_rfsh_en : 1; + RK_U32 ref_mode_rfsh_en : 1; + RK_U32 single_ref_rfsh_en : 1; + RK_U32 comp_ref_rfsh_en : 1; + + RK_U32 interp_filter_switch_en : 1; + RK_U32 allow_high_precision_mv : 1; + RK_U32 last_key_frame_flag : 1; + RK_U32 inter_coef_rfsh_flag : 1; + } reg103; + + RK_U32 reg104_no_use; + + struct SWREG105_VP9CNT_UPD_EN_AVS2_HEADLEN { + RK_U32 avs2_head_len : 4; + RK_U32 count_update_en : 1; + RK_U32 reserve : 27; + } reg105; + + struct SWREG106_VP9_FRAME_WIDTH_LAST { + RK_U32 framewidth_last : 16; + RK_U32 reserve : 16; + } reg106; + + struct SWREG107_VP9_FRAME_HEIGHT_LAST { + RK_U32 frameheight_last : 16; + RK_U32 reserve : 16; + } reg107; + + struct SWREG108_VP9_FRAME_WIDTH_GOLDEN { + RK_U32 framewidth_golden : 16; + RK_U32 reserve : 16; + } reg108; + + struct SWREG109_VP9_FRAME_HEIGHT_GOLDEN { + RK_U32 frameheight_golden : 16; + RK_U32 reserve : 16; + } reg109; + + struct SWREG110_VP9_FRAME_WIDTH_ALTREF { + RK_U32 framewidth_alfter : 16; + RK_U32 reserve : 16; + } reg110; + + struct SWREG111_VP9_FRAME_HEIGHT_ALTREF { + RK_U32 frameheight_alfter : 16; + RK_U32 reserve : 16; + } reg111; + + struct SWREG112_ERROR_REF_INFO { + RK_U32 ref_error_field : 1; + RK_U32 ref_error_topfield : 1; + RK_U32 ref_error_topfield_used : 1; + RK_U32 ref_error_botfield_used : 1; + RK_U32 reserve : 28; + } reg112; + +} Vdpu382RegVp9dParam; + +typedef struct Vdpu382RegVp9dAddr_t { + + RK_U32 reg160_delta_prob_base; + + RK_U32 reg161_pps_base; + + RK_U32 reg162_last_prob_base; + + RK_U32 reg163_rps_base; + + RK_U32 reg164_ref_last_base; + + RK_U32 reg165_ref_golden_base; + + RK_U32 reg166_ref_alfter_base; + + RK_U32 reg167_count_prob_base; + + RK_U32 reg168_segidlast_base; + + RK_U32 reg169_segidcur_base; + + RK_U32 reg170_ref_colmv_base; + + RK_U32 reg171_intercmd_base; + + RK_U32 reg172_update_prob_wr_base; + + RK_U32 reg173_179_no_use[7]; + + RK_U32 reg180_scanlist_base; + + RK_U32 reg181_196_ref_colmv_base[16]; + + RK_U32 reg197_cabactbl_base; + + RK_U32 reg198_204reserve[7]; + + struct SWREG205_DEBUG_INFO { + RK_U32 force_softreset_valid : 1; + RK_U32 force_mmureset_valid : 1; + RK_U32 reserve0 : 2; + RK_U32 error_auto_rst_disable : 1; + RK_U32 right_auto_rst_disable : 1; + RK_U32 buf_empty_security_en : 1; + RK_U32 coord_realtime_report_en : 1; + + RK_U32 fetchcmd_merge_dis : 1; + RK_U32 dec_timeout_dis : 1; + RK_U32 reserve1 : 2; + RK_U32 force_busidle_req : 1; + RK_U32 mmu_force_busidle_req : 1; + RK_U32 mmu_sel : 1; + RK_U32 reserve2 : 17; + + } reg205; +} Vdpu382RegVp9dAddr; + +typedef struct Vdpu382Vp9dRegSet_t { + Vdpu382RegCommon common; + Vdpu382RegVp9dParam vp9d_param; + Vdpu382RegCommonAddr common_addr; + Vdpu382RegVp9dAddr vp9d_addr; + Vdpu382RegIrqStatus irq_status; + Vdpu382RegStatistic statistic; +} Vdpu382Vp9dRegSet; + +#endif /* __HAL_VDPU382_VP9D_H__ */ \ No newline at end of file diff --git a/mpp/hal/rkdec/vdpu382_com.c b/mpp/hal/rkdec/vdpu382_com.c new file mode 100644 index 00000000..15a01c6e --- /dev/null +++ b/mpp/hal/rkdec/vdpu382_com.c @@ -0,0 +1,194 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define MODULE_TAG "vdpu382_com" + +#include + +#include "mpp_log.h" +#include "mpp_buffer.h" +#include "mpp_common.h" +#include "mpp_compat_impl.h" + +#include "vdpu382_com.h" + +static RK_U32 rcb_coeff[RCB_BUF_COUNT] = { + [RCB_INTRA_ROW] = 6, /* RCB_INTRA_ROW_COEF */ + [RCB_TRANSD_ROW] = 1, /* RCB_TRANSD_ROW_COEF */ + [RCB_TRANSD_COL] = 1, /* RCB_TRANSD_COL_COEF */ + [RCB_STRMD_ROW] = 3, /* RCB_STRMD_ROW_COEF */ + [RCB_INTER_ROW] = 6, /* RCB_INTER_ROW_COEF */ + [RCB_INTER_COL] = 3, /* RCB_INTER_COL_COEF */ + [RCB_DBLK_ROW] = 22, /* RCB_DBLK_ROW_COEF */ + [RCB_SAO_ROW] = 6, /* RCB_SAO_ROW_COEF */ + [RCB_FBC_ROW] = 11, /* RCB_FBC_ROW_COEF */ + [RCB_FILT_COL] = 67, /* RCB_FILT_COL_COEF */ +}; + +static RK_S32 update_size_offset(Vdpu382RcbInfo *info, RK_U32 reg, + RK_S32 offset, RK_S32 len, RK_S32 idx) +{ + RK_S32 buf_size = 0; + + buf_size = MPP_ALIGN(len * rcb_coeff[idx], RCB_ALLINE_SIZE); + info[idx].reg = reg; + info[idx].offset = offset; + info[idx].size = buf_size; + + return buf_size; +} + +RK_S32 vdpu382_get_rcb_buf_size(Vdpu382RcbInfo *info, RK_S32 width, RK_S32 height) +{ + RK_S32 offset = 0; + + offset += update_size_offset(info, 139, offset, width, RCB_DBLK_ROW); + offset += update_size_offset(info, 133, offset, width, RCB_INTRA_ROW); + offset += update_size_offset(info, 134, offset, width, RCB_TRANSD_ROW); + offset += update_size_offset(info, 136, offset, width, RCB_STRMD_ROW); + offset += update_size_offset(info, 137, offset, width, RCB_INTER_ROW); + offset += update_size_offset(info, 140, offset, width, RCB_SAO_ROW); + offset += update_size_offset(info, 141, offset, width, RCB_FBC_ROW); + /* col rcb */ + offset += update_size_offset(info, 135, offset, height, RCB_TRANSD_COL); + offset += update_size_offset(info, 138, offset, height, RCB_INTER_COL); + offset += update_size_offset(info, 142, offset, height, RCB_FILT_COL); + + return offset; +} + +void vdpu382_setup_rcb(Vdpu382RegCommonAddr *reg, MppDev dev, MppBuffer buf, Vdpu382RcbInfo *info) +{ + MppDevRegOffsetCfg trans_cfg; + RK_S32 fd = mpp_buffer_get_fd(buf); + + reg->reg139_rcb_dblk_base = fd; + reg->reg133_rcb_intra_base = fd; + reg->reg134_rcb_transd_row_base = fd; + reg->reg136_rcb_streamd_row_base = fd; + reg->reg137_rcb_inter_row_base = fd; + reg->reg140_rcb_sao_base = fd; + reg->reg141_rcb_fbc_base = fd; + reg->reg135_rcb_transd_col_base = fd; + reg->reg138_rcb_inter_col_base = fd; + reg->reg142_rcb_filter_col_base = fd; + + if (info[RCB_DBLK_ROW].offset) { + trans_cfg.reg_idx = 139; + trans_cfg.offset = info[RCB_DBLK_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_INTRA_ROW].offset) { + trans_cfg.reg_idx = 133; + trans_cfg.offset = info[RCB_INTRA_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_TRANSD_ROW].offset) { + trans_cfg.reg_idx = 134; + trans_cfg.offset = info[RCB_TRANSD_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_STRMD_ROW].offset) { + trans_cfg.reg_idx = 136; + trans_cfg.offset = info[RCB_STRMD_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_INTER_ROW].offset) { + trans_cfg.reg_idx = 137; + trans_cfg.offset = info[RCB_INTER_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_SAO_ROW].offset) { + trans_cfg.reg_idx = 140; + trans_cfg.offset = info[RCB_SAO_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_FBC_ROW].offset) { + trans_cfg.reg_idx = 141; + trans_cfg.offset = info[RCB_FBC_ROW].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_TRANSD_COL].offset) { + trans_cfg.reg_idx = 135; + trans_cfg.offset = info[RCB_TRANSD_COL].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_INTER_COL].offset) { + trans_cfg.reg_idx = 138; + trans_cfg.offset = info[RCB_INTER_COL].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } + + if (info[RCB_FILT_COL].offset) { + trans_cfg.reg_idx = 142; + trans_cfg.offset = info[RCB_FILT_COL].offset; + mpp_dev_ioctl(dev, MPP_DEV_REG_OFFSET, &trans_cfg); + } +} + +RK_S32 vdpu382_compare_rcb_size(const void *a, const void *b) +{ + RK_S32 val = 0; + Vdpu382RcbInfo *p0 = (Vdpu382RcbInfo *)a; + Vdpu382RcbInfo *p1 = (Vdpu382RcbInfo *)b; + + val = (p0->size > p1->size) ? -1 : 1; + + return val; +} + +void vdpu382_setup_statistic(Vdpu382RegCommon *com, Vdpu382RegStatistic *sta) +{ + com->reg011.pix_range_detection_e = 1; + + memset(sta, 0, sizeof(*sta)); + + sta->reg256.axi_perf_work_e = 1; + sta->reg256.axi_perf_clr_e = 1; + sta->reg256.axi_cnt_type = 1; + + sta->reg257.addr_align_type = 1; + + /* set hurry */ + sta->reg270.axi_rd_hurry_level = 3; + sta->reg270.axi_wr_hurry_level = 1; + sta->reg270.axi_wr_qos = 1; + sta->reg270.axi_rd_qos = 3; + sta->reg270.bus2mc_buffer_qos_level = 255; + sta->reg271_wr_wait_cycle_qos = 0; +} + +void vdpu382_afbc_align_calc(MppBufSlots slots, MppFrame frame, RK_U32 expand) +{ + RK_U32 ver_stride = 0; + RK_U32 img_height = mpp_frame_get_height(frame); + + mpp_slots_set_prop(slots, SLOTS_HOR_ALIGN, mpp_align_64); + mpp_slots_set_prop(slots, SLOTS_VER_ALIGN, mpp_align_16); + ver_stride = mpp_align_16(img_height); + if (*compat_ext_fbc_buf_size) { + ver_stride += expand; + } + mpp_frame_set_ver_stride(frame, ver_stride); +} diff --git a/mpp/hal/rkdec/vp9d/CMakeLists.txt b/mpp/hal/rkdec/vp9d/CMakeLists.txt index f3a7a68f..5f6e66d1 100644 --- a/mpp/hal/rkdec/vp9d/CMakeLists.txt +++ b/mpp/hal/rkdec/vp9d/CMakeLists.txt @@ -6,6 +6,7 @@ set(HAL_VP9D_SRC hal_vp9d_com.c hal_vp9d_rkv.c hal_vp9d_vdpu34x.c + hal_vp9d_vdpu382.c ) add_library(hal_vp9d STATIC ${HAL_VP9D_SRC}) diff --git a/mpp/hal/rkdec/vp9d/hal_vp9d_api.c b/mpp/hal/rkdec/vp9d/hal_vp9d_api.c index cef87e72..3a8dbe42 100644 --- a/mpp/hal/rkdec/vp9d/hal_vp9d_api.c +++ b/mpp/hal/rkdec/vp9d/hal_vp9d_api.c @@ -25,6 +25,7 @@ #include "hal_vp9d_ctx.h" #include "hal_vp9d_rkv.h" #include "hal_vp9d_vdpu34x.h" +#include "hal_vp9d_vdpu382.h" RK_U32 hal_vp9d_debug = 0; @@ -45,7 +46,10 @@ MPP_RET hal_vp9d_init(void *ctx, MppHalCfg *cfg) p->dev = cfg->dev; p->hw_id = hw_id; p->client_type = client_type; - if (hw_id == HWID_VDPU34X || hw_id == HWID_VDPU38X) { + if (hw_id == HWID_VDPU382) { + p->api = &hal_vp9d_vdpu382; + cfg->support_fast_mode = 1; + } else if (hw_id == HWID_VDPU34X || hw_id == HWID_VDPU38X) { p->api = &hal_vp9d_vdpu34x; cfg->support_fast_mode = 1; } else { diff --git a/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.c b/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.c new file mode 100644 index 00000000..bf6c58ad --- /dev/null +++ b/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.c @@ -0,0 +1,1075 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define MODULE_TAG "hal_vp9d_vdpu382" + +#include +#include + +#include "mpp_env.h" +#include "mpp_mem.h" +#include "mpp_common.h" +#include "mpp_device.h" +#include "mpp_hal.h" + +#include "hal_bufs.h" +#include "hal_vp9d_debug.h" +#include "hal_vp9d_com.h" +#include "hal_vp9d_vdpu382.h" +#include "hal_vp9d_ctx.h" +#include "vdpu382_vp9d.h" +#include "vp9d_syntax.h" + +#define HW_PROB 1 +#define VP9_CONTEXT 4 +#define VP9_CTU_SIZE 64 +#define PROB_SIZE_ALIGN_TO_4K MPP_ALIGN(PROB_SIZE, SZ_4K) +#define COUNT_SIZE_ALIGN_TO_4K MPP_ALIGN(COUNT_SIZE, SZ_4K) +#define MAX_SEGMAP_SIZE_ALIGN_TO_4K MPP_ALIGN(MAX_SEGMAP_SIZE, SZ_4K) + +#define VDPU382_OFFSET_COUNT (PROB_SIZE_ALIGN_TO_4K) +#define VDPU382_OFFSET_SEGID_CUR (PROB_SIZE_ALIGN_TO_4K + COUNT_SIZE_ALIGN_TO_4K) +#define VDPU382_OFFSET_SEGID_LAST (PROB_SIZE_ALIGN_TO_4K \ + + COUNT_SIZE_ALIGN_TO_4K \ + + MAX_SEGMAP_SIZE_ALIGN_TO_4K) +#define VDPU382_PROBE_BUFFER_SIZE (PROB_SIZE_ALIGN_TO_4K \ + + COUNT_SIZE_ALIGN_TO_4K \ + + MAX_SEGMAP_SIZE_ALIGN_TO_4K * 2) + +typedef struct Vdpu382Vp9dCtx_t { + Vp9dRegBuf g_buf[MAX_GEN_REG]; + MppBuffer probe_base; + RK_U32 offset_count; + RK_U32 offset_segid_cur; + RK_U32 offset_segid_last; + MppBuffer prob_default_base; + void* hw_regs; + RK_S32 mv_base_addr; + RK_S32 pre_mv_base_addr; + Vp9dLastInfo ls_info; + /* + * swap between segid_cur_base & segid_last_base + * 0 used segid_cur_base as last + * 1 used segid_last_base as + */ + RK_U32 last_segid_flag; + RK_S32 width; + RK_S32 height; + /* rcb buffers info */ + RK_S32 rcb_buf_size; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + MppBuffer rcb_buf; + RK_U32 num_row_tiles; + RK_U32 bit_depth; + /* colmv buffers info */ + HalBufs cmv_bufs; + RK_S32 mv_size; + RK_S32 mv_count; + RK_U32 prob_ctx_valid[VP9_CONTEXT]; + MppBuffer prob_loop_base[VP9_CONTEXT]; + RK_U32 prob_ref_poc[VP9_CONTEXT]; + RK_U32 col_ref_poc; + RK_U32 segid_ref_poc; +} Vdpu382Vp9dCtx; + +static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal) +{ + RK_S32 i = 0; + RK_S32 ret = 0; + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + hw_ctx->offset_count = VDPU382_OFFSET_COUNT; + hw_ctx->offset_segid_cur = VDPU382_OFFSET_SEGID_CUR; + hw_ctx->offset_segid_last = VDPU382_OFFSET_SEGID_LAST; + /* alloc common buffer */ + for (i = 0; i < VP9_CONTEXT; i++) { + ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_loop_base[i], PROB_SIZE); + if (ret) { + mpp_err("vp9 probe_loop_base get buffer failed\n"); + return ret; + } + } + ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_default_base, PROB_SIZE); + if (ret) { + mpp_err("vp9 probe_default_base get buffer failed\n"); + return ret; + } + /* alloc buffer for fast mode or normal */ + if (p_hal->fast_mode) { + for (i = 0; i < MAX_GEN_REG; i++) { + hw_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu382Vp9dRegSet)); + ret = mpp_buffer_get(p_hal->group, &hw_ctx->g_buf[i].probe_base, VDPU382_PROBE_BUFFER_SIZE); + if (ret) { + mpp_err("vp9 probe_base get buffer failed\n"); + return ret; + } + } + } else { + hw_ctx->hw_regs = mpp_calloc_size(void, sizeof(Vdpu382Vp9dRegSet)); + ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, VDPU382_PROBE_BUFFER_SIZE); + if (ret) { + mpp_err("vp9 probe_base get buffer failed\n"); + return ret; + } + } + return MPP_OK; +} + +static MPP_RET hal_vp9d_release_res(HalVp9dCtx *hal) +{ + RK_S32 i = 0; + RK_S32 ret = 0; + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + + if (hw_ctx->prob_default_base) { + ret = mpp_buffer_put(hw_ctx->prob_default_base); + if (ret) { + mpp_err("vp9 probe_wr_base get buffer failed\n"); + return ret; + } + } + for (i = 0; i < VP9_CONTEXT; i++) { + if (hw_ctx->prob_loop_base[i]) { + ret = mpp_buffer_put(hw_ctx->prob_loop_base[i]); + if (ret) { + mpp_err("vp9 probe_base put buffer failed\n"); + return ret; + } + } + } + if (p_hal->fast_mode) { + for (i = 0; i < MAX_GEN_REG; i++) { + if (hw_ctx->g_buf[i].probe_base) { + ret = mpp_buffer_put(hw_ctx->g_buf[i].probe_base); + if (ret) { + mpp_err("vp9 probe_base put buffer failed\n"); + return ret; + } + } + if (hw_ctx->g_buf[i].hw_regs) { + mpp_free(hw_ctx->g_buf[i].hw_regs); + hw_ctx->g_buf[i].hw_regs = NULL; + } + if (hw_ctx->g_buf[i].rcb_buf) { + ret = mpp_buffer_put(hw_ctx->g_buf[i].rcb_buf); + if (ret) { + mpp_err("vp9 rcb_buf[%d] put buffer failed\n", i); + return ret; + } + } + } + } else { + if (hw_ctx->probe_base) { + ret = mpp_buffer_put(hw_ctx->probe_base); + if (ret) { + mpp_err("vp9 probe_base get buffer failed\n"); + return ret; + } + } + + if (hw_ctx->hw_regs) { + mpp_free(hw_ctx->hw_regs); + hw_ctx->hw_regs = NULL; + } + if (hw_ctx->rcb_buf) { + ret = mpp_buffer_put(hw_ctx->rcb_buf); + if (ret) { + mpp_err("vp9 rcb_buf put buffer failed\n"); + return ret; + } + } + } + + if (hw_ctx->cmv_bufs) { + ret = hal_bufs_deinit(hw_ctx->cmv_bufs); + if (ret) { + mpp_err("vp9 cmv bufs deinit buffer failed\n"); + return ret; + } + } + return MPP_OK; +} + +static MPP_RET hal_vp9d_vdpu382_deinit(void *hal) +{ + MPP_RET ret = MPP_OK; + HalVp9dCtx *p_hal = (HalVp9dCtx *)hal; + + hal_vp9d_release_res(p_hal); + + if (p_hal->group) { + ret = mpp_buffer_group_put(p_hal->group); + if (ret) { + mpp_err("vp9d group free buffer failed\n"); + return ret; + } + } + MPP_FREE(p_hal->hw_ctx); + return ret = MPP_OK; +} + +static MPP_RET hal_vp9d_vdpu382_init(void *hal, MppHalCfg *cfg) +{ + MPP_RET ret = MPP_OK; + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + MEM_CHECK(ret, p_hal->hw_ctx = mpp_calloc_size(void, sizeof(Vdpu382Vp9dCtx))); + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + + hw_ctx->mv_base_addr = -1; + hw_ctx->pre_mv_base_addr = -1; + mpp_slots_set_prop(p_hal->slots, SLOTS_HOR_ALIGN, vp9_hor_align); + mpp_slots_set_prop(p_hal->slots, SLOTS_VER_ALIGN, vp9_ver_align); + + if (p_hal->group == NULL) { + ret = mpp_buffer_group_get_internal(&p_hal->group, MPP_BUFFER_TYPE_ION); + if (ret) { + mpp_err("vp9 mpp_buffer_group_get failed\n"); + goto __FAILED; + } + } + + ret = hal_vp9d_alloc_res(p_hal); + if (ret) { + mpp_err("hal_vp9d_alloc_res failed\n"); + goto __FAILED; + } + + hw_ctx->last_segid_flag = 1; + { + // report hw_info to parser + const MppSocInfo *info = mpp_get_soc_info(); + const void *hw_info = NULL; + RK_U32 i; + + for (i = 0; i < MPP_ARRAY_ELEMS(info->dec_caps); i++) { + if (info->dec_caps[i] && info->dec_caps[i]->type == VPU_CLIENT_RKVDEC) { + hw_info = info->dec_caps[i]; + break; + } + } + + mpp_assert(hw_info); + cfg->hw_info = hw_info; + } + + return ret; +__FAILED: + hal_vp9d_vdpu382_deinit(hal); + return ret; +} + +static void vp9d_refine_rcb_size(Vdpu382RcbInfo *rcb_info, + Vdpu382Vp9dRegSet *vp9_hw_regs, + RK_S32 width, RK_S32 height, void* data) +{ + RK_U32 rcb_bits = 0; + DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data; + RK_U32 num_tiles = pic_param->log2_tile_rows; + RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8; + RK_U32 ext_align_size = num_tiles * 64 * 8; + + width = MPP_ALIGN(width, VP9_CTU_SIZE); + height = MPP_ALIGN(height, VP9_CTU_SIZE); + /* RCB_STRMD_ROW */ + if (width > 4096) + rcb_bits = MPP_ALIGN(width, 64) * 232 + ext_align_size; + else + rcb_bits = 0; + rcb_info[RCB_STRMD_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_TRANSD_ROW */ + if (width > 8192) + rcb_bits = (MPP_ALIGN(width - 8192, 4) << 1) + ext_align_size; + else + rcb_bits = 0; + rcb_info[RCB_TRANSD_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_TRANSD_COL */ + if (height > 8192) + rcb_bits = (MPP_ALIGN(height - 8192, 4) << 1) + ext_align_size; + else + rcb_bits = 0; + rcb_info[RCB_TRANSD_COL].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_ROW */ + rcb_bits = width * 36 + ext_align_size; + rcb_info[RCB_INTER_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_INTER_COL */ + rcb_info[RCB_INTER_COL].size = 0; + /* RCB_INTRA_ROW */ + rcb_bits = width * 48 + ext_align_size; + rcb_info[RCB_INTRA_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_DBLK_ROW */ + rcb_bits = width * (1 + 16 * bit_depth) + num_tiles * 192 * bit_depth + ext_align_size; + rcb_info[RCB_DBLK_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_SAO_ROW */ + rcb_info[RCB_SAO_ROW].size = 0; + /* RCB_FBC_ROW */ + if (vp9_hw_regs->common.reg012.fbc_e) { + rcb_bits = 8 * width * bit_depth + ext_align_size; + } else + rcb_bits = 0; + rcb_info[RCB_FBC_ROW].size = MPP_RCB_BYTES(rcb_bits); + /* RCB_FILT_COL */ + if (vp9_hw_regs->common.reg012.fbc_e) { + rcb_bits = height * (4 + 24 * bit_depth); + } else + rcb_bits = height * (4 + 16 * bit_depth); + rcb_bits += ext_align_size; + rcb_info[RCB_FILT_COL].size = MPP_RCB_BYTES(rcb_bits); +} + +static void hal_vp9d_rcb_info_update(void *hal, Vdpu382Vp9dRegSet *hw_regs, void *data) +{ + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)data; + RK_U32 num_tiles = pic_param->log2_tile_rows; + RK_U32 bit_depth = pic_param->BitDepthMinus8Luma + 8; + RK_S32 height = vp9_ver_align(pic_param->height); + RK_S32 width = vp9_ver_align(pic_param->width); + + if (hw_ctx->num_row_tiles != num_tiles || + hw_ctx->bit_depth != bit_depth || + hw_ctx->width != width || + hw_ctx->height != height) { + + hw_ctx->rcb_buf_size = vdpu382_get_rcb_buf_size(hw_ctx->rcb_info, width, height); + vp9d_refine_rcb_size(hw_ctx->rcb_info, hw_regs, width, height, pic_param); + + if (p_hal->fast_mode) { + RK_U32 i; + + for (i = 0; i < MPP_ARRAY_ELEMS(hw_ctx->g_buf); i++) { + MppBuffer rcb_buf = hw_ctx->g_buf[i].rcb_buf; + + if (rcb_buf) { + mpp_buffer_put(rcb_buf); + hw_ctx->g_buf[i].rcb_buf = NULL; + } + mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size); + hw_ctx->g_buf[i].rcb_buf = rcb_buf; + } + } else { + MppBuffer rcb_buf = hw_ctx->rcb_buf; + + if (rcb_buf) { + mpp_buffer_put(rcb_buf); + rcb_buf = NULL; + } + mpp_buffer_get(p_hal->group, &rcb_buf, hw_ctx->rcb_buf_size); + hw_ctx->rcb_buf = rcb_buf; + } + + hw_ctx->num_row_tiles = num_tiles; + hw_ctx->bit_depth = bit_depth; + hw_ctx->width = width; + hw_ctx->height = height; + } +} + +static MPP_RET hal_vp9d_vdpu382_gen_regs(void *hal, HalTaskInfo *task) +{ + RK_S32 i; + RK_U8 bit_depth = 0; + RK_U32 pic_h[3] = { 0 }; + RK_U32 ref_frame_width_y; + RK_U32 ref_frame_height_y; + RK_S32 stream_len = 0, aglin_offset = 0; + RK_U32 y_hor_virstride, uv_hor_virstride, y_virstride; + RK_U8 *bitstream = NULL; + MppBuffer streambuf = NULL; + RK_U32 sw_y_hor_virstride; + RK_U32 sw_uv_hor_virstride; + RK_U32 sw_y_virstride; + RK_U8 ref_idx = 0; + RK_U32 *reg_ref_base = 0; + RK_S32 intraFlag = 0; + MppBuffer framebuf = NULL; + HalBuf *mv_buf = NULL; + RK_U32 fbc_en = 0; + + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data; + RK_S32 mv_size = pic_param->width * pic_param->height / 2; + RK_U32 frame_ctx_id = pic_param->frame_context_idx; + + if (p_hal->fast_mode) { + for (i = 0; i < MAX_GEN_REG; i++) { + if (!hw_ctx->g_buf[i].use_flag) { + task->dec.reg_index = i; + hw_ctx->probe_base = hw_ctx->g_buf[i].probe_base; + + hw_ctx->hw_regs = hw_ctx->g_buf[i].hw_regs; + hw_ctx->g_buf[i].use_flag = 1; + break; + } + } + if (i == MAX_GEN_REG) { + mpp_err("vp9 fast mode buf all used\n"); + return MPP_ERR_NOMEM; + } + } + + if (hw_ctx->cmv_bufs == NULL || hw_ctx->mv_size < mv_size) { + size_t size = mv_size; + + if (hw_ctx->cmv_bufs) { + hal_bufs_deinit(hw_ctx->cmv_bufs); + hw_ctx->cmv_bufs = NULL; + } + + hal_bufs_init(&hw_ctx->cmv_bufs); + if (hw_ctx->cmv_bufs == NULL) { + mpp_err_f("colmv bufs init fail"); + return MPP_NOK; + } + hw_ctx->mv_size = mv_size; + hw_ctx->mv_count = mpp_buf_slot_get_count(p_hal ->slots); + hal_bufs_setup(hw_ctx->cmv_bufs, hw_ctx->mv_count, 1, &size); + } + + Vdpu382Vp9dRegSet *vp9_hw_regs = (Vdpu382Vp9dRegSet*)hw_ctx->hw_regs; + intraFlag = (!pic_param->frame_type || pic_param->intra_only); + stream_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet); + memset(hw_ctx->hw_regs, 0, sizeof(Vdpu382Vp9dRegSet)); +#if HW_PROB + hal_vp9d_prob_flag_delta(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data); + if (intraFlag) + hal_vp9d_prob_default(mpp_buffer_get_ptr(hw_ctx->prob_default_base), task->dec.syntax.data); + + /* config reg103 */ + vp9_hw_regs->vp9d_param.reg103.prob_update_en = 1; + vp9_hw_regs->vp9d_param.reg103.intra_only_flag = intraFlag; + if (!intraFlag) { + vp9_hw_regs->vp9d_param.reg103.txfmmode_rfsh_en = (pic_param->txmode == 4) ? 1 : 0; + vp9_hw_regs->vp9d_param.reg103.interp_filter_switch_en = pic_param->interp_filter == 4 ? 1 : 0; + } + vp9_hw_regs->vp9d_param.reg103.ref_mode_rfsh_en = 1; + vp9_hw_regs->vp9d_param.reg103.single_ref_rfsh_en = 1; + vp9_hw_regs->vp9d_param.reg103.comp_ref_rfsh_en = 1; + vp9_hw_regs->vp9d_param.reg103.inter_coef_rfsh_flag = 0; + vp9_hw_regs->vp9d_param.reg103.refresh_en = + !pic_param->error_resilient_mode && !pic_param->parallelmode; + vp9_hw_regs->vp9d_param.reg103.prob_save_en = pic_param->refresh_frame_context; + vp9_hw_regs->vp9d_param.reg103.allow_high_precision_mv = pic_param->allow_high_precision_mv; + vp9_hw_regs->vp9d_param.reg103.last_key_frame_flag = hw_ctx->ls_info.last_intra_only; + + /* set info for multi core */ + { + MppFrame mframe = NULL; + RK_U8 ref_frame_idx = 0; + + vp9_hw_regs->common.reg028.sw_poc_arb_flag = 1; + mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe); + vp9_hw_regs->vp9d_param.reg65.cur_poc = mframe ? mpp_frame_get_poc(mframe) : 0; + // last poc + ref_idx = pic_param->frame_refs[0].Index7Bits; + ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits; + if (ref_frame_idx < 0x7f) { + mframe = NULL; + mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe); + vp9_hw_regs->vp9d_param.reg95.last_poc = mframe ? mpp_frame_get_poc(mframe) : 0; + } + // golden poc + ref_idx = pic_param->frame_refs[1].Index7Bits; + ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits; + if (ref_frame_idx < 0x7f) { + mframe = NULL; + mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe); + vp9_hw_regs->vp9d_param.reg96.golden_poc = mframe ? mpp_frame_get_poc(mframe) : 0; + } + // altref poc + ref_idx = pic_param->frame_refs[2].Index7Bits; + ref_frame_idx = pic_param->ref_frame_map[ref_idx].Index7Bits; + if (ref_frame_idx < 0x7f) { + mframe = NULL; + mpp_buf_slot_get_prop(p_hal ->slots, ref_frame_idx, SLOT_FRAME_PTR, &mframe); + vp9_hw_regs->vp9d_param.reg97.altref_poc = mframe ? mpp_frame_get_poc(mframe) : 0; + } + // colref poc + vp9_hw_regs->vp9d_param.reg98.col_ref_poc = + hw_ctx->col_ref_poc ? hw_ctx->col_ref_poc : vp9_hw_regs->vp9d_param.reg65.cur_poc; + if (pic_param->show_frame && !pic_param->show_existing_frame) + hw_ctx->col_ref_poc = vp9_hw_regs->vp9d_param.reg65.cur_poc; + // segment id ref poc + vp9_hw_regs->vp9d_param.reg100.segid_ref_poc = hw_ctx->segid_ref_poc; + + if ((pic_param->stVP9Segments.enabled && pic_param->stVP9Segments.update_map) || + (hw_ctx->ls_info.last_width != pic_param->width) || + (hw_ctx->ls_info.last_height != pic_param->height) || + intraFlag || pic_param->error_resilient_mode) { + hw_ctx->segid_ref_poc = vp9_hw_regs->vp9d_param.reg65.cur_poc; + } + } + + /* config last prob base and update write base */ + { + + if (intraFlag || pic_param->error_resilient_mode) { + if (intraFlag + || pic_param->error_resilient_mode + || (pic_param->reset_frame_context == 3)) { + memset(hw_ctx->prob_ctx_valid, 0, sizeof(hw_ctx->prob_ctx_valid)); + } else if (pic_param->reset_frame_context == 2) { + hw_ctx->prob_ctx_valid[frame_ctx_id] = 0; + } + } + +#if VP9_DUMP + { + static RK_U32 file_cnt = 0; + char file_name[128]; + RK_U32 i = 0; + sprintf(file_name, "/data/vp9/prob_last_%d.txt", file_cnt); + FILE *fp = fopen(file_name, "wb"); + RK_U32 *tmp = NULL; + if (hw_ctx->prob_ctx_valid[frame_ctx_id]) { + tmp = (RK_U32 *)mpp_buffer_get_ptr(hw_ctx->prob_loop_base[pic_param->frame_context_idx]); + } else { + tmp = (RK_U32 *)mpp_buffer_get_ptr(hw_ctx->prob_default_base); + } + for (i = 0; i < PROB_SIZE / 4; i += 2) { + fprintf(fp, "%08x%08x\n", tmp[i + 1], tmp[i]); + } + file_cnt++; + fflush(fp); + fclose(fp); + } +#endif + + if (hw_ctx->prob_ctx_valid[frame_ctx_id]) { + vp9_hw_regs->vp9d_addr.reg162_last_prob_base = + mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]); + vp9_hw_regs->common.reg028.swreg_vp9_rd_prob_idx = frame_ctx_id + 1; + vp9_hw_regs->vp9d_param.reg99.prob_ref_poc = hw_ctx->prob_ref_poc[frame_ctx_id]; + } else { + vp9_hw_regs->vp9d_addr.reg162_last_prob_base = mpp_buffer_get_fd(hw_ctx->prob_default_base); + hw_ctx->prob_ctx_valid[frame_ctx_id] |= pic_param->refresh_frame_context; + vp9_hw_regs->common.reg028.swreg_vp9_rd_prob_idx = 0; + vp9_hw_regs->vp9d_param.reg99.prob_ref_poc = 0; + if (pic_param->refresh_frame_context) + hw_ctx->prob_ref_poc[frame_ctx_id] = vp9_hw_regs->vp9d_param.reg65.cur_poc; + } + vp9_hw_regs->vp9d_addr.reg172_update_prob_wr_base = + mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]); + vp9_hw_regs->common.reg028.swreg_vp9_wr_prob_idx = frame_ctx_id + 1; + + } + vp9_hw_regs->vp9d_addr.reg160_delta_prob_base = mpp_buffer_get_fd(hw_ctx->probe_base); +#else + hal_vp9d_output_probe(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data); +#endif + vp9_hw_regs->common.reg013.cur_pic_is_idr = !pic_param->frame_type; + vp9_hw_regs->common.reg009.dec_mode = 2; //set as vp9 dec + vp9_hw_regs->common.reg016_str_len = ((stream_len + 15) & (~15)) + 0x80; + + mpp_buf_slot_get_prop(p_hal ->packet_slots, task->dec.input, SLOT_BUFFER, &streambuf); + bitstream = mpp_buffer_get_ptr(streambuf); + aglin_offset = vp9_hw_regs->common.reg016_str_len - stream_len; + if (aglin_offset > 0) { + memset((void *)(bitstream + stream_len), 0, aglin_offset); + } + + //--- caculate the yuv_frame_size and mv_size + bit_depth = pic_param->BitDepthMinus8Luma + 8; + pic_h[0] = vp9_ver_align(pic_param->height); + pic_h[1] = vp9_ver_align(pic_param->height) / 2; + pic_h[2] = pic_h[1]; + + { + MppFrame mframe = NULL; + + mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe); + fbc_en = MPP_FRAME_FMT_IS_FBC(mpp_frame_get_fmt(mframe)); + + if (fbc_en) { + RK_U32 w = MPP_ALIGN(mpp_frame_get_width(mframe), 64); + RK_U32 h = MPP_ALIGN(mpp_frame_get_height(mframe), 64); + RK_U32 fbd_offset = MPP_ALIGN(w * (h + 16) / 16, SZ_4K); + + vp9_hw_regs->common.reg012.fbc_e = 1; + vp9_hw_regs->common.reg018.y_hor_virstride = w >> 4; + vp9_hw_regs->common.reg019.uv_hor_virstride = w >> 4; + vp9_hw_regs->common.reg020_fbc_payload_off.payload_st_offset = fbd_offset >> 4; + } else { + sw_y_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4); + sw_uv_hor_virstride = (vp9_hor_align((pic_param->width * bit_depth) >> 3) >> 4); + sw_y_virstride = pic_h[0] * sw_y_hor_virstride; + + vp9_hw_regs->common.reg012.fbc_e = 0; + vp9_hw_regs->common.reg018.y_hor_virstride = sw_y_hor_virstride; + vp9_hw_regs->common.reg019.uv_hor_virstride = sw_uv_hor_virstride; + vp9_hw_regs->common.reg020_y_virstride.y_virstride = sw_y_virstride; + } + } + if (!pic_param->intra_only && pic_param->frame_type && + !pic_param->error_resilient_mode && hw_ctx->ls_info.last_show_frame) { + hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr; + } + + mpp_buf_slot_get_prop(p_hal ->slots, task->dec.output, SLOT_BUFFER, &framebuf); + vp9_hw_regs->common_addr.reg130_decout_base = mpp_buffer_get_fd(framebuf); + vp9_hw_regs->common_addr.reg128_rlc_base = mpp_buffer_get_fd(streambuf); + vp9_hw_regs->common_addr.reg129_rlcwrite_base = mpp_buffer_get_fd(streambuf); + + vp9_hw_regs->vp9d_addr.reg197_cabactbl_base = mpp_buffer_get_fd(hw_ctx->probe_base); + vp9_hw_regs->vp9d_addr.reg167_count_prob_base = mpp_buffer_get_fd(hw_ctx->probe_base); + vp9_hw_regs->vp9d_addr.reg169_segidcur_base = mpp_buffer_get_fd(hw_ctx->probe_base); + vp9_hw_regs->vp9d_addr.reg168_segidlast_base = mpp_buffer_get_fd(hw_ctx->probe_base); + mpp_dev_set_reg_offset(p_hal->dev, 167, hw_ctx->offset_count); + + if (hw_ctx->last_segid_flag) { + mpp_dev_set_reg_offset(p_hal->dev, 168, hw_ctx->offset_segid_last); + mpp_dev_set_reg_offset(p_hal->dev, 169, hw_ctx->offset_segid_cur); + } else { + mpp_dev_set_reg_offset(p_hal->dev, 168, hw_ctx->offset_segid_cur); + mpp_dev_set_reg_offset(p_hal->dev, 169, hw_ctx->offset_segid_last); + } + + if (pic_param->stVP9Segments.enabled && pic_param->stVP9Segments.update_map) { + hw_ctx->last_segid_flag = !hw_ctx->last_segid_flag; + } + //set cur colmv base + mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, task->dec.output); + vp9_hw_regs->common_addr.reg131_colmv_cur_base = mpp_buffer_get_fd(mv_buf->buf[0]); + hw_ctx->mv_base_addr = vp9_hw_regs->common_addr.reg131_colmv_cur_base; + if (hw_ctx->pre_mv_base_addr < 0) { + hw_ctx->pre_mv_base_addr = hw_ctx->mv_base_addr; + } + vp9_hw_regs->vp9d_addr.reg170_ref_colmv_base = hw_ctx->pre_mv_base_addr; + + vp9_hw_regs->vp9d_param.reg64.cprheader_offset = 0; + reg_ref_base = (RK_U32*)&vp9_hw_regs->vp9d_addr.reg164_ref_last_base; + for (i = 0; i < 3; i++) { + ref_idx = pic_param->frame_refs[i].Index7Bits; + ref_frame_width_y = pic_param->ref_frame_coded_width[ref_idx]; + ref_frame_height_y = pic_param->ref_frame_coded_height[ref_idx]; + pic_h[0] = vp9_ver_align(ref_frame_height_y); + pic_h[1] = vp9_ver_align(ref_frame_height_y) / 2; + if (fbc_en) { + y_hor_virstride = uv_hor_virstride = MPP_ALIGN(ref_frame_width_y, 64) >> 4; + } else { + y_hor_virstride = uv_hor_virstride = (vp9_hor_align((ref_frame_width_y * bit_depth) >> 3) >> 4); + } + y_virstride = y_hor_virstride * pic_h[0]; + + if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) { + mpp_buf_slot_get_prop(p_hal ->slots, pic_param->ref_frame_map[ref_idx].Index7Bits, SLOT_BUFFER, &framebuf); + } + + if (pic_param->ref_frame_map[ref_idx].Index7Bits < 0x7f) { + switch (i) { + case 0: { + vp9_hw_regs->vp9d_param.reg106.framewidth_last = ref_frame_width_y; + vp9_hw_regs->vp9d_param.reg107.frameheight_last = ref_frame_height_y; + vp9_hw_regs->vp9d_param.reg79.lastfy_hor_virstride = y_hor_virstride; + vp9_hw_regs->vp9d_param.reg80.lastfuv_hor_virstride = uv_hor_virstride; + vp9_hw_regs->vp9d_param.reg85.lastfy_virstride = y_virstride; + } break; + case 1: { + vp9_hw_regs->vp9d_param.reg108.framewidth_golden = ref_frame_width_y; + vp9_hw_regs->vp9d_param.reg109.frameheight_golden = ref_frame_height_y; + vp9_hw_regs->vp9d_param.reg81.goldenfy_hor_virstride = y_hor_virstride; + vp9_hw_regs->vp9d_param.reg82.goldenfuv_hor_virstride = uv_hor_virstride; + vp9_hw_regs->vp9d_param.reg86.goldeny_virstride = y_virstride; + } break; + case 2: { + vp9_hw_regs->vp9d_param.reg110.framewidth_alfter = ref_frame_width_y; + vp9_hw_regs->vp9d_param.reg111.frameheight_alfter = ref_frame_height_y; + vp9_hw_regs->vp9d_param.reg83.altreffy_hor_virstride = y_hor_virstride; + vp9_hw_regs->vp9d_param.reg84.altreffuv_hor_virstride = uv_hor_virstride; + vp9_hw_regs->vp9d_param.reg87.altrefy_virstride = y_virstride; + } break; + default: + break; + } + + /*0 map to 11*/ + /*1 map to 12*/ + /*2 map to 13*/ + if (framebuf != NULL) { + reg_ref_base[i] = mpp_buffer_get_fd(framebuf); + } else { + mpp_log("ref buff address is no valid used out as base slot index 0x%x", pic_param->ref_frame_map[ref_idx].Index7Bits); + reg_ref_base[i] = vp9_hw_regs->common_addr.reg130_decout_base; + } + mv_buf = hal_bufs_get_buf(hw_ctx->cmv_bufs, pic_param->ref_frame_map[ref_idx].Index7Bits); + vp9_hw_regs->vp9d_addr.reg181_196_ref_colmv_base[i] = mpp_buffer_get_fd(mv_buf->buf[0]); + } else { + reg_ref_base[i] = vp9_hw_regs->common_addr.reg130_decout_base; + vp9_hw_regs->vp9d_addr.reg181_196_ref_colmv_base[i] = vp9_hw_regs->common_addr.reg131_colmv_cur_base; + } + } + + for (i = 0; i < 8; i++) { + vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_qp_delta_en = (hw_ctx->ls_info.feature_mask[i]) & 0x1; + vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_qp_delta = hw_ctx->ls_info.feature_data[i][0]; + vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_loopfitler_value_en = (hw_ctx->ls_info.feature_mask[i] >> 1) & 0x1; + vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_loopfilter_value = hw_ctx->ls_info.feature_data[i][1]; + vp9_hw_regs->vp9d_param.reg67_74[i].segid_referinfo_en = (hw_ctx->ls_info.feature_mask[i] >> 2) & 0x1; + vp9_hw_regs->vp9d_param.reg67_74[i].segid_referinfo = hw_ctx->ls_info.feature_data[i][2]; + vp9_hw_regs->vp9d_param.reg67_74[i].segid_frame_skip_en = (hw_ctx->ls_info.feature_mask[i] >> 3) & 0x1; + } + + vp9_hw_regs->vp9d_param.reg67_74[0].segid_abs_delta = hw_ctx->ls_info.abs_delta_last; + vp9_hw_regs->vp9d_param.reg76.tx_mode = pic_param->txmode; + vp9_hw_regs->vp9d_param.reg76.frame_reference_mode = pic_param->refmode; + vp9_hw_regs->vp9d_param.reg94.ref_deltas_lastframe = 0; + + if (!intraFlag) { + for (i = 0; i < 4; i++) + vp9_hw_regs->vp9d_param.reg94.ref_deltas_lastframe |= (hw_ctx->ls_info.last_ref_deltas[i] & 0x7f) << (7 * i); + + for (i = 0; i < 2; i++) + vp9_hw_regs->vp9d_param.reg75.mode_deltas_lastframe |= (hw_ctx->ls_info.last_mode_deltas[i] & 0x7f) << (7 * i); + } else { + hw_ctx->ls_info.segmentation_enable_flag_last = 0; + hw_ctx->ls_info.last_intra_only = 1; + } + + vp9_hw_regs->vp9d_param.reg75.segmentation_enable_lstframe = hw_ctx->ls_info.segmentation_enable_flag_last; + vp9_hw_regs->vp9d_param.reg75.last_show_frame = hw_ctx->ls_info.last_show_frame; + vp9_hw_regs->vp9d_param.reg75.last_intra_only = hw_ctx->ls_info.last_intra_only; + vp9_hw_regs->vp9d_param.reg75.last_widthheight_eqcur = (pic_param->width == hw_ctx->ls_info.last_width) && (pic_param->height == hw_ctx->ls_info.last_height); + vp9_hw_regs->vp9d_param.reg78.lasttile_size = stream_len - pic_param->first_partition_size; + + + if (!intraFlag) { + vp9_hw_regs->vp9d_param.reg88.lref_hor_scale = pic_param->mvscale[0][0]; + vp9_hw_regs->vp9d_param.reg89.lref_ver_scale = pic_param->mvscale[0][1]; + vp9_hw_regs->vp9d_param.reg90.gref_hor_scale = pic_param->mvscale[1][0]; + vp9_hw_regs->vp9d_param.reg91.gref_ver_scale = pic_param->mvscale[1][1]; + vp9_hw_regs->vp9d_param.reg92.aref_hor_scale = pic_param->mvscale[2][0]; + vp9_hw_regs->vp9d_param.reg93.aref_ver_scale = pic_param->mvscale[2][1]; + } + + vp9_hw_regs->common.reg010.dec_e = 1; + vp9_hw_regs->common.reg011.buf_empty_en = 1; + vp9_hw_regs->common.reg011.dec_clkgate_e = 1; + + vp9_hw_regs->common.reg026.swreg_block_gating_e = 0xfffff; + vp9_hw_regs->common.reg026.reg_cfg_gating_en = 1; + vp9_hw_regs->common.reg032_timeout_threshold = 0x3ffff; + + //last info update + hw_ctx->ls_info.abs_delta_last = pic_param->stVP9Segments.abs_delta; + for (i = 0 ; i < 4; i ++) { + hw_ctx->ls_info.last_ref_deltas[i] = pic_param->ref_deltas[i]; + } + + for (i = 0 ; i < 2; i ++) { + hw_ctx->ls_info.last_mode_deltas[i] = pic_param->mode_deltas[i]; + } + + for (i = 0; i < 8; i++) { + hw_ctx->ls_info.feature_data[i][0] = pic_param->stVP9Segments.feature_data[i][0]; + hw_ctx->ls_info.feature_data[i][1] = pic_param->stVP9Segments.feature_data[i][1]; + hw_ctx->ls_info.feature_data[i][2] = pic_param->stVP9Segments.feature_data[i][2]; + hw_ctx->ls_info.feature_data[i][3] = pic_param->stVP9Segments.feature_data[i][3]; + hw_ctx->ls_info.feature_mask[i] = pic_param->stVP9Segments.feature_mask[i]; + } + if (!hw_ctx->ls_info.segmentation_enable_flag_last) + hw_ctx->ls_info.segmentation_enable_flag_last = pic_param->stVP9Segments.enabled; + + hw_ctx->ls_info.last_show_frame = pic_param->show_frame; + hw_ctx->ls_info.last_width = pic_param->width; + hw_ctx->ls_info.last_height = pic_param->height; + hw_ctx->ls_info.last_intra_only = (!pic_param->frame_type || pic_param->intra_only); + hal_vp9d_dbg_par("stVP9Segments.enabled %d show_frame %d width %d height %d last_intra_only %d", + pic_param->stVP9Segments.enabled, pic_param->show_frame, + pic_param->width, pic_param->height, + hw_ctx->ls_info.last_intra_only); + + hal_vp9d_rcb_info_update(hal, vp9_hw_regs, pic_param); + { + MppBuffer rcb_buf = NULL; + + rcb_buf = p_hal->fast_mode ? hw_ctx->g_buf[task->dec.reg_index].rcb_buf : hw_ctx->rcb_buf; + vdpu382_setup_rcb(&vp9_hw_regs->common_addr, p_hal->dev, rcb_buf, hw_ctx->rcb_info); + } + vdpu382_setup_statistic(&vp9_hw_regs->common, &vp9_hw_regs->statistic); + + // whether need update counts + if (pic_param->refresh_frame_context && !pic_param->parallelmode) { + task->dec.flags.wait_done = 1; + } + + return MPP_OK; +} + +static MPP_RET hal_vp9d_vdpu382_start(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + Vdpu382Vp9dRegSet *hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->hw_regs; + MppDev dev = p_hal->dev; + + if (p_hal->fast_mode) { + RK_S32 index = task->dec.reg_index; + hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->g_buf[index].hw_regs; + } + + mpp_assert(hw_regs); + + +#if VP9_DUMP + { + static RK_U32 file_cnt = 0; + char file_name[128]; + sprintf(file_name, "/data/vp9_regs/reg_%d.txt", file_cnt); + FILE *fp = fopen(file_name, "wb"); + RK_U32 i = 0; + RK_U32 *tmp = NULL; + tmp = (RK_U32 *)&hw_regs->common; + for (i = 0; i < sizeof(hw_regs->common) / 4; i++) { + fprintf(fp, "reg[%d] 0x%08x\n", i + 8, tmp[i]); + } + fprintf(fp, "\n"); + tmp = (RK_U32 *)&hw_regs->vp9d_param; + for (i = 0; i < sizeof(hw_regs->vp9d_param) / 4; i++) { + fprintf(fp, "reg[%d] 0x%08x\n", i + 64, tmp[i]); + } + fprintf(fp, "\n"); + tmp = (RK_U32 *)&hw_regs->common_addr; + for (i = 0; i < sizeof(hw_regs->common_addr) / 4; i++) { + fprintf(fp, "reg[%d] 0x%08x\n", i + 128, tmp[i]); + } + fprintf(fp, "\n"); + tmp = (RK_U32 *)&hw_regs->vp9d_addr; + for (i = 0; i < sizeof(hw_regs->vp9d_addr) / 4; i++) { + fprintf(fp, "reg[%d] 0x%08x\n", i + 160, tmp[i]); + } + file_cnt++; + fflush(fp); + fclose(fp); + } +#endif + + do { + MppDevRegWrCfg wr_cfg; + MppDevRegRdCfg rd_cfg; + + wr_cfg.reg = &hw_regs->common; + wr_cfg.size = sizeof(hw_regs->common); + wr_cfg.offset = OFFSET_COMMON_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->vp9d_param; + wr_cfg.size = sizeof(hw_regs->vp9d_param); + wr_cfg.offset = OFFSET_CODEC_PARAMS_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->common_addr; + wr_cfg.size = sizeof(hw_regs->common_addr); + wr_cfg.offset = OFFSET_COMMON_ADDR_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->vp9d_addr; + wr_cfg.size = sizeof(hw_regs->vp9d_addr); + wr_cfg.offset = OFFSET_CODEC_ADDR_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + wr_cfg.reg = &hw_regs->statistic; + wr_cfg.size = sizeof(hw_regs->statistic); + wr_cfg.offset = OFFSET_STATISTIC_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_WR, &wr_cfg); + if (ret) { + mpp_err_f("set register write failed %d\n", ret); + break; + } + + rd_cfg.reg = &hw_regs->irq_status; + rd_cfg.size = sizeof(hw_regs->irq_status); + rd_cfg.offset = OFFSET_INTERRUPT_REGS; + + ret = mpp_dev_ioctl(dev, MPP_DEV_REG_RD, &rd_cfg); + if (ret) { + mpp_err_f("set register read failed %d\n", ret); + break; + } + /* rcb info for sram */ + { + RK_U32 i = 0; + MppDevRcbInfoCfg rcb_cfg; + Vdpu382RcbInfo rcb_info[RCB_BUF_COUNT]; + + memcpy(rcb_info, hw_ctx->rcb_info, sizeof(rcb_info)); + qsort(rcb_info, MPP_ARRAY_ELEMS(rcb_info), + sizeof(rcb_info[0]), vdpu382_compare_rcb_size); + + for (i = 0; i < MPP_ARRAY_ELEMS(rcb_info); i++) { + rcb_cfg.reg_idx = rcb_info[i].reg; + rcb_cfg.size = rcb_info[i].size; + if (rcb_cfg.size > 0) { + mpp_dev_ioctl(dev, MPP_DEV_RCB_INFO, &rcb_cfg); + } else + break; + } + } + ret = mpp_dev_ioctl(dev, MPP_DEV_CMD_SEND, NULL); + if (ret) { + mpp_err_f("send cmd failed %d\n", ret); + break; + } + } while (0); + + (void)task; + return ret; +} + +static MPP_RET hal_vp9d_vdpu382_wait(void *hal, HalTaskInfo *task) +{ + MPP_RET ret = MPP_OK; + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + Vdpu382Vp9dRegSet *hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->hw_regs; + + if (p_hal->fast_mode) + hw_regs = (Vdpu382Vp9dRegSet *)hw_ctx->g_buf[task->dec.reg_index].hw_regs; + + mpp_assert(hw_regs); + + ret = mpp_dev_ioctl(p_hal->dev, MPP_DEV_CMD_POLL, NULL); + if (ret) + mpp_err_f("poll cmd failed %d\n", ret); + + if (hal_vp9d_debug & HAL_VP9D_DBG_REG) { + RK_U32 *p = (RK_U32 *)hw_regs; + RK_U32 i = 0; + + for (i = 0; i < sizeof(Vdpu382Vp9dRegSet) / 4; i++) + mpp_log("get regs[%02d]: %08X\n", i, *p++); + } + + if (task->dec.flags.parse_err || + task->dec.flags.ref_err || + !hw_regs->irq_status.reg224.dec_rdy_sta) { + MppFrame mframe = NULL; + mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe); + mpp_frame_set_errinfo(mframe, 1); + } +#if !HW_PROB + if (p_hal->dec_cb && task->dec.flags.wait_done) { + DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data; + hal_vp9d_update_counts(mpp_buffer_get_ptr(hw_ctx->count_base), task->dec.syntax.data); + mpp_callback(p_hal->dec_cb, &pic_param->counts); + } +#endif + if (p_hal->fast_mode) { + hw_ctx->g_buf[task->dec.reg_index].use_flag = 0; + } + + (void)task; + return ret; +} + +static MPP_RET hal_vp9d_vdpu382_reset(void *hal) +{ + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + + hal_vp9d_enter(); + + memset(&hw_ctx->ls_info, 0, sizeof(hw_ctx->ls_info)); + hw_ctx->mv_base_addr = -1; + hw_ctx->pre_mv_base_addr = -1; + hw_ctx->last_segid_flag = 1; + memset(&hw_ctx->prob_ref_poc, 0, sizeof(hw_ctx->prob_ref_poc)); + hw_ctx->col_ref_poc = 0; + hw_ctx->segid_ref_poc = 0; + + hal_vp9d_leave(); + + return MPP_OK; +} + +static MPP_RET hal_vp9d_vdpu382_flush(void *hal) +{ + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + Vdpu382Vp9dCtx *hw_ctx = (Vdpu382Vp9dCtx*)p_hal->hw_ctx; + + hal_vp9d_enter(); + + hw_ctx->mv_base_addr = -1; + hw_ctx->pre_mv_base_addr = -1; + + hal_vp9d_leave(); + + return MPP_OK; +} + +static MPP_RET hal_vp9d_vdpu382_control(void *hal, MpiCmd cmd_type, void *param) +{ + HalVp9dCtx *p_hal = (HalVp9dCtx*)hal; + + switch ((MpiCmd)cmd_type) { + case MPP_DEC_SET_FRAME_INFO : { + /* commit buffer stride */ + RK_U32 width = mpp_frame_get_width((MppFrame)param); + RK_U32 height = mpp_frame_get_height((MppFrame)param); + MppFrameFormat fmt = mpp_frame_get_fmt((MppFrame)param); + + if (MPP_FRAME_FMT_IS_FBC(fmt)) { + vdpu382_afbc_align_calc(p_hal->slots, (MppFrame)param, 0); + } else { + mpp_frame_set_hor_stride((MppFrame)param, vp9_hor_align(width)); + mpp_frame_set_ver_stride((MppFrame)param, vp9_ver_align(height)); + } + } break; + default : { + } break; + } + + return MPP_OK; +} + +const MppHalApi hal_vp9d_vdpu382 = { + .name = "vp9d_vdpu382", + .type = MPP_CTX_DEC, + .coding = MPP_VIDEO_CodingVP9, + .ctx_size = sizeof(Vdpu382Vp9dCtx), + .flag = 0, + .init = hal_vp9d_vdpu382_init, + .deinit = hal_vp9d_vdpu382_deinit, + .reg_gen = hal_vp9d_vdpu382_gen_regs, + .start = hal_vp9d_vdpu382_start, + .wait = hal_vp9d_vdpu382_wait, + .reset = hal_vp9d_vdpu382_reset, + .flush = hal_vp9d_vdpu382_flush, + .control = hal_vp9d_vdpu382_control, +}; diff --git a/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.h b/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.h new file mode 100644 index 00000000..d1d74a6c --- /dev/null +++ b/mpp/hal/rkdec/vp9d/hal_vp9d_vdpu382.h @@ -0,0 +1,32 @@ +/* + * Copyright 2022 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_VP9D_VDPU382_H__ +#define __HAL_VP9D_VDPU382_H__ + +#include "mpp_hal.h" +#include "vdpu382.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const MppHalApi hal_vp9d_vdpu382; + +#ifdef __cplusplus +} +#endif +#endif /* __HAL_VP9D_VDPU382_H__ */