rv34: joint coefficient decoding and dequantization

Perform dequantization while decoding coefficients instead of performing it
on the entire coefficients buffer.

Since quantized coefficients are very sparse, this usually causes a small
speedup. Speedup of around 1% on Panda board compared to the removed here
neon code. Global speedup is probably around 3%.

Signed-off-by: Kostya Shishkov <kostya.shishkov@gmail.com>
This commit is contained in:
Christophe GISQUET
2012-01-01 15:28:47 +01:00
committed by Kostya Shishkov
parent 0749720b6c
commit 98f24ecd6c
6 changed files with 44 additions and 90 deletions

View File

@@ -100,26 +100,10 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
/** @} */ // transform
/**
* Dequantize ordinary 4x4 block.
*/
void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
static void rv34_dequant4x4_c(DCTELEM *block, int Qdc, int Q)
{
int i, j;
block[0] = (block[0] * Qdc + 8) >> 4;
for (i = 0; i < 4; i++)
for (j = !i; j < 4; j++)
block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
}
av_cold void ff_rv34dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->rv34_inv_transform_tab[0] = rv34_inv_transform_c;
c->rv34_inv_transform_tab[1] = rv34_inv_transform_noround_c;
c->rv34_dequant4x4 = rv34_dequant4x4_c;
if (HAVE_NEON)
ff_rv34dsp_init_neon(c, dsp);
}