rv34: joint coefficient decoding and dequantization

Perform dequantization while decoding coefficients instead of performing it on the entire coefficients buffer. Since quantized coefficients are very sparse, this usually causes a small speedup. Speedup of around 1% on Panda board compared to the removed here neon code. Global speedup is probably around 3%. Signed-off-by: Kostya Shishkov <kostya.shishkov@gmail.com>
2025-11-03 01:33:50 +08:00 · 2012-01-01 15:28:47 +01:00
parent 0749720b6c
commit 98f24ecd6c
6 changed files with 44 additions and 90 deletions
--- a/libavcodec/rv34dsp.c
+++ b/libavcodec/rv34dsp.c
@@ -100,26 +100,10 @@ static void rv34_inv_transform_noround_c(DCTELEM *block){
 /** @} */ // transform


-/**
- * Dequantize ordinary 4x4 block.
- */
-void ff_rv34_dequant4x4_neon(DCTELEM *block, int Qdc, int Q);
-static void rv34_dequant4x4_c(DCTELEM *block, int Qdc, int Q)
-{
-    int i, j;
-
-    block[0] = (block[0] * Qdc + 8) >> 4;
-    for (i = 0; i < 4; i++)
-        for (j = !i; j < 4; j++)
-            block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
-}
-
 av_cold void ff_rv34dsp_init(RV34DSPContext *c, DSPContext* dsp) {
    c->rv34_inv_transform_tab[0] = rv34_inv_transform_c;
    c->rv34_inv_transform_tab[1] = rv34_inv_transform_noround_c;

-    c->rv34_dequant4x4 = rv34_dequant4x4_c;
-
    if (HAVE_NEON)
        ff_rv34dsp_init_neon(c, dsp);
 }