mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-26 10:20:52 +08:00 
			
		
		
		
	 d2ae5f77c6
			
		
	
	d2ae5f77c6
	
	
	
		
			
			Performance improvements: quant_bands: with: 681 decicycles in quant_bands, 8388453 runs, 155 skips without: 1190 decicycles in quant_bands, 8388386 runs, 222 skips Around 42% for the function Twoloop coder: abs_pow34: with/without: 7.82s/8.17s Around 4% for the entire encoder Both: with/without: 7.15s/8.17s Around 12% for the entire encoder Fast coder: abs_pow34: with/without: 3.40s/3.77s Around 10% for the entire encoder Both: with/without: 3.02s/3.77s Around 20% faster for the entire encoder Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com> Tested-by: Michael Niedermayer <michael@niedermayer.cc> Reviewed-by: James Almer <jamrial@gmail.com>
		
			
				
	
	
		
			193 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * AAC encoder trellis codebook selector
 | |
|  * Copyright (C) 2008-2009 Konstantin Shishkov
 | |
|  *
 | |
|  * This file is part of FFmpeg.
 | |
|  *
 | |
|  * FFmpeg is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * FFmpeg is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with FFmpeg; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * @file
 | |
|  * AAC encoder trellis codebook selector
 | |
|  * @author Konstantin Shishkov
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * This file contains a template for the codebook_trellis_rate selector function.
 | |
|  * It needs to be provided, externally, as an already included declaration,
 | |
|  * the following functions from aacenc_quantization/util.h. They're not included
 | |
|  * explicitly here to make it possible to provide alternative implementations:
 | |
|  *  - quantize_band_cost_bits
 | |
|  *  - abs_pow34_v
 | |
|  */
 | |
| 
 | |
| #ifndef AVCODEC_AACCODER_TRELLIS_H
 | |
| #define AVCODEC_AACCODER_TRELLIS_H
 | |
| 
 | |
| #include <float.h>
 | |
| #include "libavutil/mathematics.h"
 | |
| #include "avcodec.h"
 | |
| #include "put_bits.h"
 | |
| #include "aac.h"
 | |
| #include "aacenc.h"
 | |
| #include "aactab.h"
 | |
| #include "aacenctab.h"
 | |
| 
 | |
| /**
 | |
|  * structure used in optimal codebook search
 | |
|  */
 | |
| typedef struct TrellisBandCodingPath {
 | |
|     int prev_idx; ///< pointer to the previous path point
 | |
|     float cost;   ///< path cost
 | |
|     int run;
 | |
| } TrellisBandCodingPath;
 | |
| 
 | |
| 
 | |
| static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
 | |
|                                   int win, int group_len, const float lambda)
 | |
| {
 | |
|     TrellisBandCodingPath path[120][CB_TOT_ALL];
 | |
|     int w, swb, cb, start, size;
 | |
|     int i, j;
 | |
|     const int max_sfb  = sce->ics.max_sfb;
 | |
|     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
 | |
|     const int run_esc  = (1 << run_bits) - 1;
 | |
|     int idx, ppos, count;
 | |
|     int stackrun[120], stackcb[120], stack_len;
 | |
|     float next_minbits = INFINITY;
 | |
|     int next_mincb = 0;
 | |
| 
 | |
|     s->abs_pow34(s->scoefs, sce->coeffs, 1024);
 | |
|     start = win*128;
 | |
|     for (cb = 0; cb < CB_TOT_ALL; cb++) {
 | |
|         path[0][cb].cost     = run_bits+4;
 | |
|         path[0][cb].prev_idx = -1;
 | |
|         path[0][cb].run      = 0;
 | |
|     }
 | |
|     for (swb = 0; swb < max_sfb; swb++) {
 | |
|         size = sce->ics.swb_sizes[swb];
 | |
|         if (sce->zeroes[win*16 + swb]) {
 | |
|             float cost_stay_here = path[swb][0].cost;
 | |
|             float cost_get_here  = next_minbits + run_bits + 4;
 | |
|             if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
 | |
|                 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
 | |
|                 cost_stay_here += run_bits;
 | |
|             if (cost_get_here < cost_stay_here) {
 | |
|                 path[swb+1][0].prev_idx = next_mincb;
 | |
|                 path[swb+1][0].cost     = cost_get_here;
 | |
|                 path[swb+1][0].run      = 1;
 | |
|             } else {
 | |
|                 path[swb+1][0].prev_idx = 0;
 | |
|                 path[swb+1][0].cost     = cost_stay_here;
 | |
|                 path[swb+1][0].run      = path[swb][0].run + 1;
 | |
|             }
 | |
|             next_minbits = path[swb+1][0].cost;
 | |
|             next_mincb = 0;
 | |
|             for (cb = 1; cb < CB_TOT_ALL; cb++) {
 | |
|                 path[swb+1][cb].cost = 61450;
 | |
|                 path[swb+1][cb].prev_idx = -1;
 | |
|                 path[swb+1][cb].run = 0;
 | |
|             }
 | |
|         } else {
 | |
|             float minbits = next_minbits;
 | |
|             int mincb = next_mincb;
 | |
|             int startcb = sce->band_type[win*16+swb];
 | |
|             startcb = aac_cb_in_map[startcb];
 | |
|             next_minbits = INFINITY;
 | |
|             next_mincb = 0;
 | |
|             for (cb = 0; cb < startcb; cb++) {
 | |
|                 path[swb+1][cb].cost = 61450;
 | |
|                 path[swb+1][cb].prev_idx = -1;
 | |
|                 path[swb+1][cb].run = 0;
 | |
|             }
 | |
|             for (cb = startcb; cb < CB_TOT_ALL; cb++) {
 | |
|                 float cost_stay_here, cost_get_here;
 | |
|                 float bits = 0.0f;
 | |
|                 if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
 | |
|                     path[swb+1][cb].cost = 61450;
 | |
|                     path[swb+1][cb].prev_idx = -1;
 | |
|                     path[swb+1][cb].run = 0;
 | |
|                     continue;
 | |
|                 }
 | |
|                 for (w = 0; w < group_len; w++) {
 | |
|                     bits += quantize_band_cost_bits(s, &sce->coeffs[start + w*128],
 | |
|                                                &s->scoefs[start + w*128], size,
 | |
|                                                sce->sf_idx[win*16+swb],
 | |
|                                                aac_cb_out_map[cb],
 | |
|                                                0, INFINITY, NULL, NULL, 0);
 | |
|                 }
 | |
|                 cost_stay_here = path[swb][cb].cost + bits;
 | |
|                 cost_get_here  = minbits            + bits + run_bits + 4;
 | |
|                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
 | |
|                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
 | |
|                     cost_stay_here += run_bits;
 | |
|                 if (cost_get_here < cost_stay_here) {
 | |
|                     path[swb+1][cb].prev_idx = mincb;
 | |
|                     path[swb+1][cb].cost     = cost_get_here;
 | |
|                     path[swb+1][cb].run      = 1;
 | |
|                 } else {
 | |
|                     path[swb+1][cb].prev_idx = cb;
 | |
|                     path[swb+1][cb].cost     = cost_stay_here;
 | |
|                     path[swb+1][cb].run      = path[swb][cb].run + 1;
 | |
|                 }
 | |
|                 if (path[swb+1][cb].cost < next_minbits) {
 | |
|                     next_minbits = path[swb+1][cb].cost;
 | |
|                     next_mincb = cb;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         start += sce->ics.swb_sizes[swb];
 | |
|     }
 | |
| 
 | |
|     //convert resulting path from backward-linked list
 | |
|     stack_len = 0;
 | |
|     idx       = 0;
 | |
|     for (cb = 1; cb < CB_TOT_ALL; cb++)
 | |
|         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
 | |
|             idx = cb;
 | |
|     ppos = max_sfb;
 | |
|     while (ppos > 0) {
 | |
|         av_assert1(idx >= 0);
 | |
|         cb = idx;
 | |
|         stackrun[stack_len] = path[ppos][cb].run;
 | |
|         stackcb [stack_len] = cb;
 | |
|         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
 | |
|         ppos -= path[ppos][cb].run;
 | |
|         stack_len++;
 | |
|     }
 | |
|     //perform actual band info encoding
 | |
|     start = 0;
 | |
|     for (i = stack_len - 1; i >= 0; i--) {
 | |
|         cb = aac_cb_out_map[stackcb[i]];
 | |
|         put_bits(&s->pb, 4, cb);
 | |
|         count = stackrun[i];
 | |
|         memset(sce->zeroes + win*16 + start, !cb, count);
 | |
|         //XXX: memset when band_type is also uint8_t
 | |
|         for (j = 0; j < count; j++) {
 | |
|             sce->band_type[win*16 + start] = cb;
 | |
|             start++;
 | |
|         }
 | |
|         while (count >= run_esc) {
 | |
|             put_bits(&s->pb, run_bits, run_esc);
 | |
|             count -= run_esc;
 | |
|         }
 | |
|         put_bits(&s->pb, run_bits, count);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| #endif /* AVCODEC_AACCODER_TRELLIS_H */
 |