[vp9d_vdpu34x]: Vp9d support hw prob

Signed-off-by: Yandong Lin <yandong.lin@rock-chips.com>
Change-Id: I6b4eadad2ea4e08eef8816f76bbabe3ec3f7494c
This commit is contained in:
Yandong Lin
2020-12-09 20:44:52 +08:00
committed by Herman Chen
parent de8ace6622
commit ad8d8a669a
9 changed files with 1877 additions and 121 deletions

View File

@@ -454,7 +454,7 @@ static RK_S32 inv_recenter_nonneg(RK_S32 v, RK_S32 m)
}
// differential forward probability updates
static RK_S32 update_prob(VpxRangeCoder *c, RK_S32 p)
static RK_S32 update_prob(VpxRangeCoder *c, RK_S32 p, RK_U8 *delta)
{
static const RK_S32 inv_map_table[255] = {
7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
@@ -507,7 +507,7 @@ static RK_S32 update_prob(VpxRangeCoder *c, RK_S32 p)
d += 64;
//av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
}
*delta = d;
return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
}
@@ -1069,6 +1069,7 @@ static RK_S32 decode_parser_header(Vp9CodecContext *ctx,
// as explicit copies if the fw update is missing (and skip the copy upon
// fw update)?
s->prob.p = s->prob_ctx[c].p;
memset(&s->prob_flag_delta, 0, sizeof(s->prob_flag_delta));
// txfm updates
if (s->lossless) {
s->txfmmode = TX_4X4;
@@ -1080,23 +1081,32 @@ static RK_S32 decode_parser_header(Vp9CodecContext *ctx,
if (s->txfmmode == TX_SWITCHABLE) {
for (i = 0; i < 2; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.tx8p[i] = 1;
s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i],
&s->prob_flag_delta.p_delta.tx8p[i]);
}
}
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.tx16p[i][j] = 1;
s->prob.p.tx16p[i][j] =
update_prob(&s->c, s->prob.p.tx16p[i][j]);
update_prob(&s->c, s->prob.p.tx16p[i][j],
&s->prob_flag_delta.p_delta.tx16p[i][j]);
}
}
for (i = 0; i < 2; i++)
for (j = 0; j < 3; j++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.tx32p[i][j] = 1;
s->prob.p.tx32p[i][j] =
update_prob(&s->c, s->prob.p.tx32p[i][j]);
update_prob(&s->c, s->prob.p.tx32p[i][j],
&s->prob_flag_delta.p_delta.tx32p[i][j]);
}
}
}
}
@@ -1110,17 +1120,20 @@ static RK_S32 decode_parser_header(Vp9CodecContext *ctx,
for (l = 0; l < 6; l++)
for (m = 0; m < 6; m++) {
RK_U8 *p = s->prob.coef[i][j][k][l][m];
RK_U8 *p_flag = s->prob_flag_delta.coef_flag[i][j][k][l][m];
RK_U8 *p_delta = s->prob_flag_delta.coef_delta[i][j][k][l][m];
RK_U8 *r = ref[j][k][l][m];
if (m >= 3 && l == 0) // dc only has 3 pt
if (l == 0 && m >= 3) // dc only has 3 pt
break;
for (n = 0; n < 3; n++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
p[n] = update_prob(&s->c, r[n]);
p_flag[n] = 1;
p[n] = update_prob(&s->c, r[n], &p_delta[n]);
} else {
p_flag[n] = 0;
p[n] = r[n];
}
}
p[3] = 0;
}
} else {
for (j = 0; j < 2; j++)
@@ -1129,10 +1142,9 @@ static RK_S32 decode_parser_header(Vp9CodecContext *ctx,
for (m = 0; m < 6; m++) {
RK_U8 *p = s->prob.coef[i][j][k][l][m];
RK_U8 *r = ref[j][k][l][m];
if (m > 3 && l == 0) // dc only has 3 pt
if (m >= 3 && l == 0) // dc only has 3 pt
break;
memcpy(p, r, 3);
p[3] = 0;
}
}
if (s->txfmmode == (RK_U32)i)
@@ -1142,28 +1154,42 @@ static RK_S32 decode_parser_header(Vp9CodecContext *ctx,
// mode updates
for (i = 0; i < 3; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.skip[i] = 1;
s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i],
&s->prob_flag_delta.p_delta.skip[i]);
}
}
if (!s->keyframe && !s->intraonly) {
for (i = 0; i < 7; i++)
for (j = 0; j < 3; j++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (j = 0; j < 3; j++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_mode[i][j] = 1;
s->prob.p.mv_mode[i][j] =
update_prob(&s->c, s->prob.p.mv_mode[i][j]);
update_prob(&s->c, s->prob.p.mv_mode[i][j],
&s->prob_flag_delta.p_delta.mv_mode[i][j]);
}
}
if (s->filtermode == FILTER_SWITCHABLE)
for (i = 0; i < 4; i++)
for (j = 0; j < 2; j++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (j = 0; j < 2; j++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.filter[i][j] = 1;
s->prob.p.filter[i][j] =
update_prob(&s->c, s->prob.p.filter[i][j]);
update_prob(&s->c, s->prob.p.filter[i][j],
&s->prob_flag_delta.p_delta.filter[i][j]);
}
}
for (i = 0; i < 4; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.intra[i] = 1;
s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i],
&s->prob_flag_delta.p_delta.intra[i]);
}
}
@@ -1172,90 +1198,135 @@ static RK_S32 decode_parser_header(Vp9CodecContext *ctx,
if (s->comppredmode)
s->comppredmode += vpx_rac_get(&s->c);
if (s->comppredmode == PRED_SWITCHABLE)
for (i = 0; i < 5; i++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (i = 0; i < 5; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.comp[i] = 1;
s->prob.p.comp[i] =
update_prob(&s->c, s->prob.p.comp[i]);
update_prob(&s->c, s->prob.p.comp[i],
&s->prob_flag_delta.p_delta.comp[i]);
}
}
} else {
s->comppredmode = PRED_SINGLEREF;
}
if (s->comppredmode != PRED_COMPREF) {
for (i = 0; i < 5; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.single_ref[i][0] = 1;
s->prob.p.single_ref[i][0] =
update_prob(&s->c, s->prob.p.single_ref[i][0]);
if (vpx_rac_get_prob_branchy(&s->c, 252))
update_prob(&s->c, s->prob.p.single_ref[i][0],
&s->prob_flag_delta.p_delta.single_ref[i][0]);
}
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.single_ref[i][1] = 1;
s->prob.p.single_ref[i][1] =
update_prob(&s->c, s->prob.p.single_ref[i][1]);
update_prob(&s->c, s->prob.p.single_ref[i][1],
&s->prob_flag_delta.p_delta.single_ref[i][1]);
}
}
}
if (s->comppredmode != PRED_SINGLEREF) {
for (i = 0; i < 5; i++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (i = 0; i < 5; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.comp_ref[i] = 1;
s->prob.p.comp_ref[i] =
update_prob(&s->c, s->prob.p.comp_ref[i]);
update_prob(&s->c, s->prob.p.comp_ref[i],
&s->prob_flag_delta.p_delta.comp_ref[i]);
}
}
}
for (i = 0; i < 4; i++)
for (j = 0; j < 9; j++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (j = 0; j < 9; j++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.y_mode[i][j] = 1;
s->prob.p.y_mode[i][j] =
update_prob(&s->c, s->prob.p.y_mode[i][j]);
update_prob(&s->c, s->prob.p.y_mode[i][j],
&s->prob_flag_delta.p_delta.y_mode[i][j]);
}
}
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
for (k = 0; k < 3; k++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (k = 0; k < 3; k++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.partition[3 - i][j][k] = 1;
s->prob.p.partition[3 - i][j][k] =
update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
update_prob(&s->c, s->prob.p.partition[3 - i][j][k],
&s->prob_flag_delta.p_delta.partition[3 - i][j][k]);
}
}
// mv fields don't use the update_prob subexp model for some reason
for (i = 0; i < 3; i++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
for (i = 0; i < 3; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_joint[i] = 1;
s->prob_flag_delta.p_delta.mv_joint[i] =
s->prob.p.mv_joint[i] = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
}
for (i = 0; i < 2; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].sign = 1;
s->prob_flag_delta.p_delta.mv_comp[i].sign =
s->prob.p.mv_comp[i].sign = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
for (j = 0; j < 10; j++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.mv_comp[i].classes[j] =
(vpx_rac_get_uint(&s->c, 7) << 1) | 1;
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].classes[j] = 1;
s->prob_flag_delta.p_delta.mv_comp[i].classes[j] =
s->prob.p.mv_comp[i].classes[j] = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
if (vpx_rac_get_prob_branchy(&s->c, 252))
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].class0 = 1;
s->prob_flag_delta.p_delta.mv_comp[i].class0 =
s->prob.p.mv_comp[i].class0 = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
for (j = 0; j < 10; j++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.mv_comp[i].bits[j] =
(vpx_rac_get_uint(&s->c, 7) << 1) | 1;
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].bits[j] = 1;
s->prob_flag_delta.p_delta.mv_comp[i].bits[j] =
s->prob.p.mv_comp[i].bits[j] = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
}
for (i = 0; i < 2; i++) {
for (j = 0; j < 2; j++)
for (k = 0; k < 3; k++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.mv_comp[i].class0_fp[j][k] =
(vpx_rac_get_uint(&s->c, 7) << 1) | 1;
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].class0_fp[j][k] = 1;
s->prob_flag_delta.p_delta.mv_comp[i].class0_fp[j][k] =
s->prob.p.mv_comp[i].class0_fp[j][k] = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
for (j = 0; j < 3; j++)
if (vpx_rac_get_prob_branchy(&s->c, 252))
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].fp[j] = 1;
s->prob_flag_delta.p_delta.mv_comp[i].fp[j] =
s->prob.p.mv_comp[i].fp[j] =
(vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
}
if (s->highprecisionmvs) {
for (i = 0; i < 2; i++) {
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.mv_comp[i].class0_hp =
(vpx_rac_get_uint(&s->c, 7) << 1) | 1;
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].class0_hp = 1;
s->prob_flag_delta.p_delta.mv_comp[i].class0_hp =
s->prob.p.mv_comp[i].class0_hp = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
if (vpx_rac_get_prob_branchy(&s->c, 252))
s->prob.p.mv_comp[i].hp =
(vpx_rac_get_uint(&s->c, 7) << 1) | 1;
if (vpx_rac_get_prob_branchy(&s->c, 252)) {
s->prob_flag_delta.p_flag.mv_comp[i].hp = 1;
s->prob_flag_delta.p_delta.mv_comp[i].hp =
s->prob.p.mv_comp[i].hp = (vpx_rac_get_uint(&s->c, 7) << 1) | 1;
}
}
}
}

View File

@@ -188,10 +188,16 @@ typedef struct VP9Context {
} prob_ctx[4];
struct {
prob_context p;
RK_U8 coef[4][2][2][6][6][11];
RK_U8 coef[4][2][2][6][6][3];
RK_U8 seg[7];
RK_U8 segpred[3];
} prob;
struct {
prob_context p_flag;
prob_context p_delta;
RK_U8 coef_flag[4][2][2][6][6][3];
RK_U8 coef_delta[4][2][2][6][6][3];
} prob_flag_delta;
struct {
RK_U32 partition[4][4][4];
RK_U32 skip[3][2];

View File

@@ -3,6 +3,29 @@
#include "vp9d_parser.h"
#include "vp9d_syntax.h"
#define TRANS_TO_HW_STYLE(uv_mode) \
do{ \
RK_U8 *uv_ptr = NULL; \
RK_U8 uv_mode_prob[10][9]; \
for (i = 0; i < 10; i++) { \
if (i == 0) { \
uv_ptr = uv_mode[2]; \
} else if ( i == 1) { \
uv_ptr = uv_mode[0]; \
} else if ( i == 2) { \
uv_ptr = uv_mode[1]; \
} else if ( i == 7) { \
uv_ptr = uv_mode[8]; \
} else if (i == 8) { \
uv_ptr = uv_mode[7]; \
} else { \
uv_ptr = uv_mode[i]; \
} \
memcpy(&uv_mode_prob[i], uv_ptr, 9); \
} \
memcpy(uv_mode, uv_mode_prob, sizeof(uv_mode_prob)); \
}while(0)
static int vp9d_fill_segmentation(VP9Context *s, DXVA_segmentation_VP9 *seg)
{
int i;
@@ -49,7 +72,10 @@ static int vp9d_fill_picparams(Vp9CodecContext *ctx, DXVA_PicParams_VP9 *pic)
{
VP9Context *s = ctx->priv_data;
RK_U8 partition_probs[16][3];
RK_U8 uv_mode_prob[10][9];
RK_U8 partition_probs_flag[16][3];
RK_U8 partition_probs_delata[16][3];
DXVA_prob_vp9* prob_flag = &pic->prob_flag_delta.p_flag;
DXVA_prob_vp9* prob_delta = &pic->prob_flag_delta.p_delta;
int i;
pic->profile = ctx->profile;
@@ -105,10 +131,9 @@ static int vp9d_fill_picparams(Vp9CodecContext *ctx, DXVA_PicParams_VP9 *pic)
pic->log2_tile_rows = s->tiling.log2_tile_rows;
pic->first_partition_size = s->first_partition_size;
memcpy(pic->mvscale, s->mvscale, sizeof(s->mvscale));
memcpy(&ctx->pic_params.prob, &s->prob, sizeof(ctx->pic_params.prob));
memcpy(&pic->prob, &s->prob, sizeof(pic->prob));
memcpy(&pic->prob_flag_delta, &s->prob_flag_delta, sizeof(pic->prob_flag_delta));
{
RK_U8 *uv_ptr = NULL;
RK_U32 m = 0;
/*change partition to hardware need style*/
/*
hardware syntax
@@ -117,11 +142,20 @@ static int vp9d_fill_picparams(Vp9CodecContext *ctx, DXVA_PicParams_VP9 *pic)
*+++++32x32+++* *++++16x16++++*
*+++++64x64+++* *++++8x8++++++*
*/
m = 0;
for (i = 3; i >= 0; i--) {
memcpy(&partition_probs[m][0], &ctx->pic_params.prob.partition[i][0][0], 12);
m += 4;
RK_U32 m = 0;
RK_U32 len = sizeof(pic->prob.partition[0]);
RK_U32 step = len / sizeof(partition_probs[0]);
for (i = MPP_ARRAY_ELEMS(pic->prob.partition) - 1; i >= 0; i--) {
memcpy(&partition_probs[m][0], &pic->prob.partition[i][0][0], len);
memcpy(&partition_probs_flag[m][0], &prob_flag->partition[i][0][0], len);
memcpy(&partition_probs_delata[m][0], &prob_delta->partition[i][0][0], len);
m += step;
}
memcpy(pic->prob.partition, partition_probs, sizeof(partition_probs));
memcpy(prob_flag->partition, partition_probs_flag, sizeof(partition_probs_flag));
memcpy(prob_delta->partition, partition_probs_delata, sizeof(partition_probs_delata));
/*change uv_mode to hardware need style*/
/*
hardware syntax
@@ -137,24 +171,9 @@ static int vp9d_fill_picparams(Vp9CodecContext *ctx, DXVA_PicParams_VP9 *pic)
*+++++ tm ++++* *++++ tm ++++*
*/
for (i = 0; i < 10; i++) {
if (i == 0) {
uv_ptr = ctx->pic_params.prob.uv_mode[2];//dc
} else if ( i == 1) {
uv_ptr = ctx->pic_params.prob.uv_mode[0]; //h
} else if ( i == 2) {
uv_ptr = ctx->pic_params.prob.uv_mode[1]; //h
} else if ( i == 7) {
uv_ptr = ctx->pic_params.prob.uv_mode[8]; //d207
} else if (i == 8) {
uv_ptr = ctx->pic_params.prob.uv_mode[7]; //d63
} else {
uv_ptr = ctx->pic_params.prob.uv_mode[i];
}
memcpy(&uv_mode_prob[i], uv_ptr, 9);
}
memcpy(ctx->pic_params.prob.partition, partition_probs, sizeof(partition_probs));
memcpy(ctx->pic_params.prob.uv_mode, uv_mode_prob, sizeof(uv_mode_prob));
TRANS_TO_HW_STYLE(pic->prob.uv_mode);
TRANS_TO_HW_STYLE(prob_flag->uv_mode);
TRANS_TO_HW_STYLE(prob_delta->uv_mode);
}
return 0;
}

View File

@@ -60,6 +60,33 @@ typedef struct _segmentation_VP9 {
UCHAR feature_mask[8];
} DXVA_segmentation_VP9;
typedef struct {
RK_U8 y_mode[4][9];
RK_U8 uv_mode[10][9];
RK_U8 filter[4][2];
RK_U8 mv_mode[7][3];
RK_U8 intra[4];
RK_U8 comp[5];
RK_U8 single_ref[5][2];
RK_U8 comp_ref[5];
RK_U8 tx32p[2][3];
RK_U8 tx16p[2][2];
RK_U8 tx8p[2];
RK_U8 skip[3];
RK_U8 mv_joint[3];
struct {
RK_U8 sign;
RK_U8 classes[10];
RK_U8 class0;
RK_U8 bits[10];
RK_U8 class0_fp[2][3];
RK_U8 fp[3];
RK_U8 class0_hp;
RK_U8 hp;
} mv_comp[2];
RK_U8 partition[4][4][3];
} DXVA_prob_vp9;
typedef struct _DXVA_PicParams_VP9 {
DXVA_PicEntry_VPx CurrPic;
UCHAR profile;
@@ -142,7 +169,7 @@ typedef struct _DXVA_PicParams_VP9 {
UCHAR hp;
} mv_comp[2];
UCHAR partition[4][4][3];
UCHAR coef[4][2][2][6][6][11];
UCHAR coef[4][2][2][6][6][3];
} prob;
struct {
UINT partition[4][4][4];
@@ -170,6 +197,12 @@ typedef struct _DXVA_PicParams_VP9 {
UINT coef[4][2][2][6][6][3];
UINT eob[4][2][2][6][6][2];
} counts;
struct {
DXVA_prob_vp9 p_flag;
DXVA_prob_vp9 p_delta;
RK_U8 coef_flag[4][2][2][6][6][3];
RK_U8 coef_delta[4][2][2][6][6][3];
} prob_flag_delta;
USHORT mvscale[3][2];
CHAR txmode;
CHAR refmode;

View File

@@ -228,7 +228,7 @@ typedef struct Vdpu34xRegVp9dAddr_t {
RK_U32 reg161_pps_base;
RK_U32 reg162_last_porb_base;
RK_U32 reg162_last_prob_base;
RK_U32 reg163_rps_base;
@@ -250,7 +250,6 @@ typedef struct Vdpu34xRegVp9dAddr_t {
RK_U32 reg172_update_prob_wr_base;
RK_U32 reg173_179_no_use[7];
RK_U32 reg180_scanlist_base;

File diff suppressed because it is too large Load Diff

View File

@@ -52,7 +52,7 @@ typedef RK_U8 vp9_prob;
#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
#define MV_FP_SIZE 4
#define PROBE_SIZE 4864
#define PROB_SIZE 4864
#define COUNT_SIZE 13208
/*
* nCtuX*nCtuY*8*8/2
@@ -61,6 +61,8 @@ typedef RK_U8 vp9_prob;
*/
#define MAX_SEGMAP_SIZE 73728
#define VP9_DUMP 0
//!< memory malloc check
#define MEM_CHECK(ret, val, ...)\
do{\
@@ -83,7 +85,9 @@ RK_U32 vp9_ver_align(RK_U32 val);
RK_U32 vp9_hor_align(RK_U32 val);
MPP_RET hal_vp9d_output_probe(void *buf, void *dxva);
MPP_RET hal_vp9d_prob_flag_delta(void *buf, void *dxva);
void hal_vp9d_update_counts(void *buf, void *dxva);
MPP_RET hal_vp9d_prob_default(void *buf, void *dxva);
#ifdef __cplusplus
}

View File

@@ -61,7 +61,7 @@ static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
for (i = 0; i < MAX_GEN_REG; i++) {
hw_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(VP9_REGS));
ret = mpp_buffer_get(p_hal->group,
&hw_ctx->g_buf[i].probe_base, PROBE_SIZE);
&hw_ctx->g_buf[i].probe_base, PROB_SIZE);
if (ret) {
mpp_err("vp9 probe_base get buffer failed\n");
return ret;
@@ -87,7 +87,7 @@ static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
}
} else {
hw_ctx->hw_regs = mpp_calloc_size(void, sizeof(VP9_REGS));
ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, PROBE_SIZE);
ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, PROB_SIZE);
if (ret) {
mpp_err("vp9 probe_base get buffer failed\n");
return ret;

View File

@@ -34,12 +34,15 @@
#include "vdpu34x_vp9d.h"
#include "vp9d_syntax.h"
#define HW_PROB 1
typedef struct Vdpu34xVp9dCtx_t {
Vp9dRegBuf g_buf[MAX_GEN_REG];
MppBuffer probe_base;
MppBuffer count_base;
MppBuffer segid_cur_base;
MppBuffer segid_last_base;
MppBuffer prob_default_base;
void* hw_regs;
RK_S32 mv_base_addr;
RK_S32 pre_mv_base_addr;
@@ -61,6 +64,8 @@ typedef struct Vdpu34xVp9dCtx_t {
HalBufs cmv_bufs;
RK_S32 mv_size;
RK_S32 mv_count;
RK_U32 prob_ctx_valid[4];
MppBuffer prob_loop_base[4];
} Vdpu34xVp9dCtx;
static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
@@ -74,7 +79,7 @@ static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
for (i = 0; i < MAX_GEN_REG; i++) {
hw_ctx->g_buf[i].hw_regs = mpp_calloc_size(void, sizeof(Vdpu34xVp9dRegSet));
ret = mpp_buffer_get(p_hal->group,
&hw_ctx->g_buf[i].probe_base, PROBE_SIZE);
&hw_ctx->g_buf[i].probe_base, PROB_SIZE);
if (ret) {
mpp_err("vp9 probe_base get buffer failed\n");
return ret;
@@ -100,11 +105,24 @@ static MPP_RET hal_vp9d_alloc_res(HalVp9dCtx *hal)
}
} else {
hw_ctx->hw_regs = mpp_calloc_size(void, sizeof(Vdpu34xVp9dRegSet));
ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, PROBE_SIZE);
ret = mpp_buffer_get(p_hal->group, &hw_ctx->probe_base, PROB_SIZE);
if (ret) {
mpp_err("vp9 probe_base get buffer failed\n");
return ret;
}
ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_default_base, PROB_SIZE);
if (ret) {
mpp_err("vp9 probe_default_base get buffer failed\n");
return ret;
}
for (i = 0; i < 4; i++) {
ret = mpp_buffer_get(p_hal->group, &hw_ctx->prob_loop_base[i], PROB_SIZE);
if (ret) {
mpp_err("vp9 probe_loop_base get buffer failed\n");
return ret;
}
}
ret = mpp_buffer_get(p_hal->group, &hw_ctx->count_base, COUNT_SIZE);
if (ret) {
mpp_err("vp9 count_base get buffer failed\n");
@@ -174,6 +192,22 @@ static MPP_RET hal_vp9d_release_res(HalVp9dCtx *hal)
return ret;
}
}
if (hw_ctx->prob_default_base) {
ret = mpp_buffer_put(hw_ctx->prob_default_base);
if (ret) {
mpp_err("vp9 probe_wr_base get buffer failed\n");
return ret;
}
}
for (i = 0; i < 4; i++) {
if (hw_ctx->prob_loop_base[i]) {
ret = mpp_buffer_put(hw_ctx->prob_loop_base[i]);
if (ret) {
mpp_err("vp9 probe_base put buffer failed\n");
return ret;
}
}
}
if (hw_ctx->count_base) {
ret = mpp_buffer_put(hw_ctx->count_base);
if (ret) {
@@ -309,6 +343,7 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
Vdpu34xVp9dCtx *hw_ctx = (Vdpu34xVp9dCtx*)p_hal->hw_ctx;
DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
RK_S32 mv_size = pic_param->width * pic_param->height / 2;
RK_U32 frame_ctx_id = pic_param->frame_context_idx;
if (p_hal->fast_mode) {
for (i = 0; i < MAX_GEN_REG; i++) {
@@ -349,10 +384,80 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
Vdpu34xVp9dRegSet *vp9_hw_regs = (Vdpu34xVp9dRegSet*)hw_ctx->hw_regs;
intraFlag = (!pic_param->frame_type || pic_param->intra_only);
hal_vp9d_output_probe(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
stream_len = (RK_S32)mpp_packet_get_length(task->dec.input_packet);
memset(hw_ctx->hw_regs, 0, sizeof(Vdpu34xVp9dRegSet));
vp9_hw_regs->common.reg013.cur_pic_is_idr = intraFlag;
#if HW_PROB
hal_vp9d_prob_flag_delta(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
if (intraFlag)
hal_vp9d_prob_default(mpp_buffer_get_ptr(hw_ctx->prob_default_base), task->dec.syntax.data);
/* config reg103 */
vp9_hw_regs->vp9d_param.reg103.prob_update_en = 1;
vp9_hw_regs->vp9d_param.reg103.intra_only_flag = intraFlag;
if (!intraFlag) {
vp9_hw_regs->vp9d_param.reg103.txfmmode_rfsh_en = (pic_param->txmode == 4) ? 1 : 0;
vp9_hw_regs->vp9d_param.reg103.interp_filter_switch_en = pic_param->interp_filter == 4 ? 1 : 0;
}
vp9_hw_regs->vp9d_param.reg103.ref_mode_rfsh_en = 1;
vp9_hw_regs->vp9d_param.reg103.single_ref_rfsh_en = 1;
vp9_hw_regs->vp9d_param.reg103.comp_ref_rfsh_en = 1;
vp9_hw_regs->vp9d_param.reg103.inter_coef_rfsh_flag = 0;
vp9_hw_regs->vp9d_param.reg103.refresh_en =
!pic_param->error_resilient_mode && !pic_param->parallelmode;
vp9_hw_regs->vp9d_param.reg103.prob_save_en = pic_param->refresh_frame_context;
vp9_hw_regs->vp9d_param.reg103.allow_high_precision_mv = pic_param->allow_high_precision_mv;
vp9_hw_regs->vp9d_param.reg103.last_key_frame_flag = hw_ctx->ls_info.last_intra_only;
/* config last prob base and update write base */
{
if (intraFlag || pic_param->error_resilient_mode) {
if (intraFlag
|| pic_param->error_resilient_mode
|| (pic_param->reset_frame_context == 3)) {
memset(hw_ctx->prob_ctx_valid, 0, sizeof(hw_ctx->prob_ctx_valid));
} else if (pic_param->reset_frame_context == 2) {
hw_ctx->prob_ctx_valid[frame_ctx_id] = 0;
}
}
#if VP9_DUMP
{
static RK_U32 file_cnt = 0;
char file_name[128];
RK_U32 i = 0;
sprintf(file_name, "/data/vp9/prob_last_%d.txt", file_cnt);
FILE *fp = fopen(file_name, "wb");
RK_U32 *tmp = NULL;
if (hw_ctx->prob_ctx_valid[frame_ctx_id]) {
tmp = (RK_U32 *)mpp_buffer_get_ptr(hw_ctx->prob_loop_base[pic_param->frame_context_idx]);
} else {
tmp = (RK_U32 *)mpp_buffer_get_ptr(hw_ctx->prob_default_base);
}
for (i = 0; i < PROB_SIZE / 4; i += 2) {
fprintf(fp, "%08x%08x\n", tmp[i + 1], tmp[i]);
}
file_cnt++;
fflush(fp);
fclose(fp);
}
#endif
if (hw_ctx->prob_ctx_valid[frame_ctx_id]) {
vp9_hw_regs->vp9d_addr.reg162_last_prob_base =
mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
} else {
vp9_hw_regs->vp9d_addr.reg162_last_prob_base = mpp_buffer_get_fd(hw_ctx->prob_default_base);
hw_ctx->prob_ctx_valid[frame_ctx_id] |= pic_param->refresh_frame_context;
}
vp9_hw_regs->vp9d_addr.reg172_update_prob_wr_base =
mpp_buffer_get_fd(hw_ctx->prob_loop_base[frame_ctx_id]);
}
vp9_hw_regs->vp9d_addr.reg160_delta_prob_base = mpp_buffer_get_fd(hw_ctx->probe_base);
#else
hal_vp9d_output_probe(mpp_buffer_get_ptr(hw_ctx->probe_base), task->dec.syntax.data);
#endif
vp9_hw_regs->common.reg013.cur_pic_is_idr = !pic_param->frame_type;
vp9_hw_regs->common.reg009.dec_mode = 2; //set as vp9 dec
vp9_hw_regs->common.reg016_str_len = ((stream_len + 15) & (~15)) + 0x80;
@@ -513,7 +618,6 @@ static MPP_RET hal_vp9d_vdpu34x_gen_regs(void *hal, HalTaskInfo *task)
hw_ctx->ls_info.last_intra_only = 1;
}
vp9_hw_regs->vp9d_param.reg75.mode_deltas_lastframe = 0;
vp9_hw_regs->vp9d_param.reg75.segmentation_enable_lstframe = hw_ctx->ls_info.segmentation_enable_flag_last;
vp9_hw_regs->vp9d_param.reg75.last_show_frame = hw_ctx->ls_info.last_show_frame;
vp9_hw_regs->vp9d_param.reg75.last_intra_only = hw_ctx->ls_info.last_intra_only;
@@ -608,13 +712,39 @@ static MPP_RET hal_vp9d_vdpu34x_start(void *hal, HalTaskInfo *task)
mpp_assert(hw_regs);
if (hal_vp9d_debug & HAL_VP9D_DBG_REG) {
RK_U32 *p = (RK_U32 *)hw_ctx->hw_regs;
RK_U32 i = 0;
for (i = 0; i < sizeof(Vdpu34xVp9dRegSet) / 4; i++)
mpp_log("set regs[%02d]: %08X\n", i, *p++);
#if VP9_DUMP
{
static RK_U32 file_cnt = 0;
char file_name[128];
sprintf(file_name, "/data/vp9_regs/reg_%d.txt", file_cnt);
FILE *fp = fopen(file_name, "wb");
RK_U32 i = 0;
RK_U32 *tmp = NULL;
tmp = (RK_U32 *)&hw_regs->common;
for (i = 0; i < sizeof(hw_regs->common) / 4; i++) {
fprintf(fp, "reg[%d] 0x%08x\n", i + 8, tmp[i]);
}
fprintf(fp, "\n");
tmp = (RK_U32 *)&hw_regs->vp9d_param;
for (i = 0; i < sizeof(hw_regs->vp9d_param) / 4; i++) {
fprintf(fp, "reg[%d] 0x%08x\n", i + 64, tmp[i]);
}
fprintf(fp, "\n");
tmp = (RK_U32 *)&hw_regs->common_addr;
for (i = 0; i < sizeof(hw_regs->common_addr) / 4; i++) {
fprintf(fp, "reg[%d] 0x%08x\n", i + 128, tmp[i]);
}
fprintf(fp, "\n");
tmp = (RK_U32 *)&hw_regs->vp9d_addr;
for (i = 0; i < sizeof(hw_regs->vp9d_addr) / 4; i++) {
fprintf(fp, "reg[%d] 0x%08x\n", i + 160, tmp[i]);
}
file_cnt++;
fflush(fp);
fclose(fp);
}
#endif
do {
MppDevRegWrCfg wr_cfg;
@@ -712,12 +842,13 @@ static MPP_RET hal_vp9d_vdpu34x_wait(void *hal, HalTaskInfo *task)
mpp_buf_slot_get_prop(p_hal->slots, task->dec.output, SLOT_FRAME_PTR, &mframe);
mpp_frame_set_errinfo(mframe, 1);
}
#if !HW_PROB
if (p_hal->dec_cb && task->dec.flags.wait_done) {
DXVA_PicParams_VP9 *pic_param = (DXVA_PicParams_VP9*)task->dec.syntax.data;
hal_vp9d_update_counts(mpp_buffer_get_ptr(hw_ctx->count_base), task->dec.syntax.data);
mpp_callback(p_hal->dec_cb, DEC_PARSER_CALLBACK, &pic_param->counts);
}
#endif
if (p_hal->fast_mode) {
hw_ctx->g_buf[task->dec.reg_index].use_flag = 0;
}