mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-31 20:42:49 +08:00 
			
		
		
		
	 9bc3d3355f
			
		
	
	9bc3d3355f
	
	
	
		
			
			ff_huff_build_tree uses qsort underneath. AV_QSORT is substantially faster due to the inlining of the comparison callback. Furthermore, this code is reasonably performance critical, since in e.g the fraps codec, ff_huff_build_tree is called on every frame. This routine is also called in vp6 on every frame in some circumstances. Sample benchmark (x86-64, Haswell, GNU/Linux), vp6 from FATE: vp6 (old): 78930 decicycles in qsort, 1 runs, 0 skips 45330 decicycles in qsort, 2 runs, 0 skips 27825 decicycles in qsort, 4 runs, 0 skips 17471 decicycles in qsort, 8 runs, 0 skips 12296 decicycles in qsort, 16 runs, 0 skips 9554 decicycles in qsort, 32 runs, 0 skips 8404 decicycles in qsort, 64 runs, 0 skips 7405 decicycles in qsort, 128 runs, 0 skips 6740 decicycles in qsort, 256 runs, 0 skips 7540 decicycles in qsort, 512 runs, 0 skips 9498 decicycles in qsort, 1024 runs, 0 skips 9938 decicycles in qsort, 2048 runs, 0 skips 8043 decicycles in qsort, 4095 runs, 1 skips vp6 (new): 15880 decicycles in qsort, 1 runs, 0 skips 10730 decicycles in qsort, 2 runs, 0 skips 10155 decicycles in qsort, 4 runs, 0 skips 7805 decicycles in qsort, 8 runs, 0 skips 6883 decicycles in qsort, 16 runs, 0 skips 6305 decicycles in qsort, 32 runs, 0 skips 5854 decicycles in qsort, 64 runs, 0 skips 5152 decicycles in qsort, 128 runs, 0 skips 4452 decicycles in qsort, 256 runs, 0 skips 4161 decicycles in qsort, 511 runs, 1 skips 4081 decicycles in qsort, 1023 runs, 1 skips 4072 decicycles in qsort, 2047 runs, 1 skips 4004 decicycles in qsort, 4095 runs, 1 skips Reviewed-by: Timothy Gu <timothygu99@gmail.com> Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
		
			
				
	
	
		
			199 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			199 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2006 Konstantin Shishkov
 | |
|  * Copyright (c) 2007 Loren Merritt
 | |
|  *
 | |
|  * This file is part of FFmpeg.
 | |
|  *
 | |
|  * FFmpeg is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * FFmpeg is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with FFmpeg; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * @file
 | |
|  * huffman tree builder and VLC generator
 | |
|  */
 | |
| 
 | |
| #include <stdint.h>
 | |
| 
 | |
| #include "libavutil/qsort.h"
 | |
| #include "avcodec.h"
 | |
| #include "get_bits.h"
 | |
| #include "huffman.h"
 | |
| 
 | |
| /* symbol for Huffman tree node */
 | |
| #define HNODE -1
 | |
| 
 | |
| typedef struct HeapElem {
 | |
|     uint64_t val;
 | |
|     int name;
 | |
| } HeapElem;
 | |
| 
 | |
| static void heap_sift(HeapElem *h, int root, int size)
 | |
| {
 | |
|     while (root * 2 + 1 < size) {
 | |
|         int child = root * 2 + 1;
 | |
|         if (child < size - 1 && h[child].val > h[child+1].val)
 | |
|             child++;
 | |
|         if (h[root].val > h[child].val) {
 | |
|             FFSWAP(HeapElem, h[root], h[child]);
 | |
|             root = child;
 | |
|         } else
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| int ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats, int stats_size, int skip0)
 | |
| {
 | |
|     HeapElem *h  = av_malloc_array(sizeof(*h), stats_size);
 | |
|     int *up      = av_malloc_array(sizeof(*up) * 2, stats_size);
 | |
|     uint8_t *len = av_malloc_array(sizeof(*len) * 2, stats_size);
 | |
|     uint16_t *map= av_malloc_array(sizeof(*map), stats_size);
 | |
|     int offset, i, next;
 | |
|     int size = 0;
 | |
|     int ret = 0;
 | |
| 
 | |
|     if (!h || !up || !len || !map) {
 | |
|         ret = AVERROR(ENOMEM);
 | |
|         goto end;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i<stats_size; i++) {
 | |
|         dst[i] = 255;
 | |
|         if (stats[i] || !skip0)
 | |
|             map[size++] = i;
 | |
|     }
 | |
| 
 | |
|     for (offset = 1; ; offset <<= 1) {
 | |
|         for (i=0; i < size; i++) {
 | |
|             h[i].name = i;
 | |
|             h[i].val = (stats[map[i]] << 14) + offset;
 | |
|         }
 | |
|         for (i = size / 2 - 1; i >= 0; i--)
 | |
|             heap_sift(h, i, size);
 | |
| 
 | |
|         for (next = size; next < size * 2 - 1; next++) {
 | |
|             // merge the two smallest entries, and put it back in the heap
 | |
|             uint64_t min1v = h[0].val;
 | |
|             up[h[0].name] = next;
 | |
|             h[0].val = INT64_MAX;
 | |
|             heap_sift(h, 0, size);
 | |
|             up[h[0].name] = next;
 | |
|             h[0].name = next;
 | |
|             h[0].val += min1v;
 | |
|             heap_sift(h, 0, size);
 | |
|         }
 | |
| 
 | |
|         len[2 * size - 2] = 0;
 | |
|         for (i = 2 * size - 3; i >= size; i--)
 | |
|             len[i] = len[up[i]] + 1;
 | |
|         for (i = 0; i < size; i++) {
 | |
|             dst[map[i]] = len[up[i]] + 1;
 | |
|             if (dst[map[i]] >= 32) break;
 | |
|         }
 | |
|         if (i==size) break;
 | |
|     }
 | |
| end:
 | |
|     av_free(h);
 | |
|     av_free(up);
 | |
|     av_free(len);
 | |
|     av_free(map);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static void get_tree_codes(uint32_t *bits, int16_t *lens, uint8_t *xlat,
 | |
|                            Node *nodes, int node,
 | |
|                            uint32_t pfx, int pl, int *pos, int no_zero_count)
 | |
| {
 | |
|     int s;
 | |
| 
 | |
|     s = nodes[node].sym;
 | |
|     if (s != HNODE || (no_zero_count && !nodes[node].count)) {
 | |
|         bits[*pos] = pfx;
 | |
|         lens[*pos] = pl;
 | |
|         xlat[*pos] = s;
 | |
|         (*pos)++;
 | |
|     } else {
 | |
|         pfx <<= 1;
 | |
|         pl++;
 | |
|         get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0, pfx, pl,
 | |
|                        pos, no_zero_count);
 | |
|         pfx |= 1;
 | |
|         get_tree_codes(bits, lens, xlat, nodes, nodes[node].n0 + 1, pfx, pl,
 | |
|                        pos, no_zero_count);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags, int nb_bits)
 | |
| {
 | |
|     int no_zero_count = !(flags & FF_HUFFMAN_FLAG_ZERO_COUNT);
 | |
|     uint32_t bits[256];
 | |
|     int16_t lens[256];
 | |
|     uint8_t xlat[256];
 | |
|     int pos = 0;
 | |
| 
 | |
|     get_tree_codes(bits, lens, xlat, nodes, head, 0, 0,
 | |
|                    &pos, no_zero_count);
 | |
|     return ff_init_vlc_sparse(vlc, nb_bits, pos, lens, 2, 2, bits, 4, 4, xlat, 1, 1, 0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * nodes size must be 2*nb_codes
 | |
|  * first nb_codes nodes.count must be set
 | |
|  */
 | |
| int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes, int nb_bits,
 | |
|                        Node *nodes, HuffCmp cmp, int flags)
 | |
| {
 | |
|     int i, j;
 | |
|     int cur_node;
 | |
|     int64_t sum = 0;
 | |
| 
 | |
|     for (i = 0; i < nb_codes; i++) {
 | |
|         nodes[i].sym = i;
 | |
|         nodes[i].n0 = -2;
 | |
|         sum += nodes[i].count;
 | |
|     }
 | |
| 
 | |
|     if (sum >> 31) {
 | |
|         av_log(avctx, AV_LOG_ERROR,
 | |
|                "Too high symbol frequencies. "
 | |
|                "Tree construction is not possible\n");
 | |
|         return -1;
 | |
|     }
 | |
|     AV_QSORT(nodes, nb_codes, Node, cmp);
 | |
|     cur_node = nb_codes;
 | |
|     nodes[nb_codes*2-1].count = 0;
 | |
|     for (i = 0; i < nb_codes * 2 - 1; i += 2) {
 | |
|         uint32_t cur_count = nodes[i].count + nodes[i+1].count;
 | |
|         // find correct place to insert new node, and
 | |
|         // make space for the new node while at it
 | |
|         for(j = cur_node; j > i + 2; j--){
 | |
|             if(cur_count > nodes[j-1].count ||
 | |
|                (cur_count == nodes[j-1].count &&
 | |
|                 !(flags & FF_HUFFMAN_FLAG_HNODE_FIRST)))
 | |
|                 break;
 | |
|             nodes[j] = nodes[j - 1];
 | |
|         }
 | |
|         nodes[j].sym = HNODE;
 | |
|         nodes[j].count = cur_count;
 | |
|         nodes[j].n0 = i;
 | |
|         cur_node++;
 | |
|     }
 | |
|     if (build_huff_tree(vlc, nodes, nb_codes * 2 - 2, flags, nb_bits) < 0) {
 | |
|         av_log(avctx, AV_LOG_ERROR, "Error building tree\n");
 | |
|         return -1;
 | |
|     }
 | |
|     return 0;
 | |
| }
 |