mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-25 09:51:06 +08:00 
			
		
		
		
	dsputil: Move SVQ1 encoding specific bits into svq1enc
This commit is contained in:
		| @@ -2099,16 +2099,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, | |||||||
|     return score; |     return score; | ||||||
| } | } | ||||||
|  |  | ||||||
| static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, |  | ||||||
|                                int size) |  | ||||||
| { |  | ||||||
|     int score = 0, i; |  | ||||||
|  |  | ||||||
|     for (i = 0; i < size; i++) |  | ||||||
|         score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); |  | ||||||
|     return score; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #define WRAPPER8_16_SQ(name8, name16)                                   \ | #define WRAPPER8_16_SQ(name8, name16)                                   \ | ||||||
| static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,        \ | static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,        \ | ||||||
|                   int stride, int h)                                    \ |                   int stride, int h)                                    \ | ||||||
| @@ -2430,8 +2420,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||||
|     c->nsse[0] = nsse16_c; |     c->nsse[0] = nsse16_c; | ||||||
|     c->nsse[1] = nsse8_c; |     c->nsse[1] = nsse8_c; | ||||||
|  |  | ||||||
|     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |  | ||||||
|  |  | ||||||
|     c->bswap_buf   = bswap_buf; |     c->bswap_buf   = bswap_buf; | ||||||
|     c->bswap16_buf = bswap16_buf; |     c->bswap16_buf = bswap16_buf; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -175,9 +175,6 @@ typedef struct DSPContext { | |||||||
|     me_cmp_func ildct_cmp[6]; // only width 16 used |     me_cmp_func ildct_cmp[6]; // only width 16 used | ||||||
|     me_cmp_func frame_skip_cmp[6]; // only width 8 used |     me_cmp_func frame_skip_cmp[6]; // only width 8 used | ||||||
|  |  | ||||||
|     int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, |  | ||||||
|                              int size); |  | ||||||
|  |  | ||||||
|     qpel_mc_func put_qpel_pixels_tab[2][16]; |     qpel_mc_func put_qpel_pixels_tab[2][16]; | ||||||
|     qpel_mc_func avg_qpel_pixels_tab[2][16]; |     qpel_mc_func avg_qpel_pixels_tab[2][16]; | ||||||
|     qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |     qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | ||||||
|   | |||||||
| @@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO)               += ppc/mpegvideo_altivec.o | |||||||
| OBJS-$(CONFIG_VIDEODSP)                += ppc/videodsp_ppc.o | OBJS-$(CONFIG_VIDEODSP)                += ppc/videodsp_ppc.o | ||||||
| OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o | OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o | ||||||
|  |  | ||||||
|  | OBJS-$(CONFIG_SVQ1_ENCODER)            += ppc/svq1enc_altivec.o | ||||||
| OBJS-$(CONFIG_VC1_DECODER)             += ppc/vc1dsp_altivec.o | OBJS-$(CONFIG_VC1_DECODER)             += ppc/vc1dsp_altivec.o | ||||||
| OBJS-$(CONFIG_VORBIS_DECODER)          += ppc/vorbisdsp_altivec.o | OBJS-$(CONFIG_VORBIS_DECODER)          += ppc/vorbisdsp_altivec.o | ||||||
| OBJS-$(CONFIG_VP7_DECODER)             += ppc/vp8dsp_altivec.o | OBJS-$(CONFIG_VP7_DECODER)             += ppc/vp8dsp_altivec.o | ||||||
|   | |||||||
| @@ -34,48 +34,6 @@ | |||||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||||
| #include "dsputil_altivec.h" | #include "dsputil_altivec.h" | ||||||
|  |  | ||||||
| static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, |  | ||||||
|                                      int size) |  | ||||||
| { |  | ||||||
|     int i, size16 = size >> 4; |  | ||||||
|     vector signed char vpix1; |  | ||||||
|     vector signed short vpix2, vdiff, vpix1l, vpix1h; |  | ||||||
|     union { |  | ||||||
|         vector signed int vscore; |  | ||||||
|         int32_t score[4]; |  | ||||||
|     } u = { .vscore = vec_splat_s32(0) }; |  | ||||||
|  |  | ||||||
| // XXX lazy way, fix it later |  | ||||||
|  |  | ||||||
|     while (size16) { |  | ||||||
|         // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); |  | ||||||
|         // load pix1 and the first batch of pix2 |  | ||||||
|  |  | ||||||
|         vpix1 = vec_unaligned_load(pix1); |  | ||||||
|         vpix2 = vec_unaligned_load(pix2); |  | ||||||
|         pix2 += 8; |  | ||||||
|         // unpack |  | ||||||
|         vpix1h = vec_unpackh(vpix1); |  | ||||||
|         vdiff  = vec_sub(vpix1h, vpix2); |  | ||||||
|         vpix1l = vec_unpackl(vpix1); |  | ||||||
|         // load another batch from pix2 |  | ||||||
|         vpix2    = vec_unaligned_load(pix2); |  | ||||||
|         u.vscore = vec_msum(vdiff, vdiff, u.vscore); |  | ||||||
|         vdiff    = vec_sub(vpix1l, vpix2); |  | ||||||
|         u.vscore = vec_msum(vdiff, vdiff, u.vscore); |  | ||||||
|         pix1    += 16; |  | ||||||
|         pix2    += 8; |  | ||||||
|         size16--; |  | ||||||
|     } |  | ||||||
|     u.vscore = vec_sums(u.vscore, vec_splat_s32(0)); |  | ||||||
|  |  | ||||||
|     size %= 16; |  | ||||||
|     for (i = 0; i < size; i++) |  | ||||||
|         u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); |  | ||||||
|  |  | ||||||
|     return u.score[3]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, | static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, | ||||||
|                                            int order) |                                            int order) | ||||||
| { | { | ||||||
| @@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, | |||||||
|  |  | ||||||
| av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) | av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) | ||||||
| { | { | ||||||
|     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; |  | ||||||
|  |  | ||||||
|     c->scalarproduct_int16 = scalarproduct_int16_altivec; |     c->scalarproduct_int16 = scalarproduct_int16_altivec; | ||||||
|  |  | ||||||
|     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec; |     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec; | ||||||
|   | |||||||
							
								
								
									
										80
									
								
								libavcodec/ppc/svq1enc_altivec.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								libavcodec/ppc/svq1enc_altivec.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | |||||||
|  | /* | ||||||
|  |  * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org> | ||||||
|  |  * | ||||||
|  |  * This file is part of Libav. | ||||||
|  |  * | ||||||
|  |  * Libav is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * Libav is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||||
|  |  * Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public | ||||||
|  |  * License along with Libav; if not, write to the Free Software | ||||||
|  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #include <stdint.h> | ||||||
|  |  | ||||||
|  | #include "config.h" | ||||||
|  | #if HAVE_ALTIVEC_H | ||||||
|  | #include <altivec.h> | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #include "libavutil/attributes.h" | ||||||
|  | #include "libavutil/ppc/types_altivec.h" | ||||||
|  | #include "libavutil/ppc/util_altivec.h" | ||||||
|  | #include "libavcodec/svq1enc.h" | ||||||
|  |  | ||||||
|  | #if HAVE_ALTIVEC | ||||||
|  | static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, | ||||||
|  |                                      int size) | ||||||
|  | { | ||||||
|  |     int i, size16 = size >> 4; | ||||||
|  |     vector signed char vpix1; | ||||||
|  |     vector signed short vpix2, vdiff, vpix1l, vpix1h; | ||||||
|  |     union { | ||||||
|  |         vector signed int vscore; | ||||||
|  |         int32_t score[4]; | ||||||
|  |     } u = { .vscore = vec_splat_s32(0) }; | ||||||
|  |  | ||||||
|  |     while (size16) { | ||||||
|  |         // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); | ||||||
|  |         // load pix1 and the first batch of pix2 | ||||||
|  |  | ||||||
|  |         vpix1 = vec_unaligned_load(pix1); | ||||||
|  |         vpix2 = vec_unaligned_load(pix2); | ||||||
|  |         pix2 += 8; | ||||||
|  |         // unpack | ||||||
|  |         vpix1h = vec_unpackh(vpix1); | ||||||
|  |         vdiff  = vec_sub(vpix1h, vpix2); | ||||||
|  |         vpix1l = vec_unpackl(vpix1); | ||||||
|  |         // load another batch from pix2 | ||||||
|  |         vpix2    = vec_unaligned_load(pix2); | ||||||
|  |         u.vscore = vec_msum(vdiff, vdiff, u.vscore); | ||||||
|  |         vdiff    = vec_sub(vpix1l, vpix2); | ||||||
|  |         u.vscore = vec_msum(vdiff, vdiff, u.vscore); | ||||||
|  |         pix1    += 16; | ||||||
|  |         pix2    += 8; | ||||||
|  |         size16--; | ||||||
|  |     } | ||||||
|  |     u.vscore = vec_sums(u.vscore, vec_splat_s32(0)); | ||||||
|  |  | ||||||
|  |     size %= 16; | ||||||
|  |     for (i = 0; i < size; i++) | ||||||
|  |         u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); | ||||||
|  |  | ||||||
|  |     return u.score[3]; | ||||||
|  | } | ||||||
|  | #endif /* HAVE_ALTIVEC */ | ||||||
|  |  | ||||||
|  | av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c) | ||||||
|  | { | ||||||
|  | #if HAVE_ALTIVEC | ||||||
|  |     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; | ||||||
|  | #endif /* HAVE_ALTIVEC */ | ||||||
|  | } | ||||||
| @@ -34,49 +34,12 @@ | |||||||
| #include "internal.h" | #include "internal.h" | ||||||
| #include "mpegutils.h" | #include "mpegutils.h" | ||||||
| #include "svq1.h" | #include "svq1.h" | ||||||
|  | #include "svq1enc.h" | ||||||
| #include "svq1enc_cb.h" | #include "svq1enc_cb.h" | ||||||
|  |  | ||||||
| #undef NDEBUG | #undef NDEBUG | ||||||
| #include <assert.h> | #include <assert.h> | ||||||
|  |  | ||||||
| typedef struct SVQ1EncContext { |  | ||||||
|     /* FIXME: Needed for motion estimation, should not be used for anything |  | ||||||
|      * else, the idea is to make the motion estimation eventually independent |  | ||||||
|      * of MpegEncContext, so this will be removed then. */ |  | ||||||
|     MpegEncContext m; |  | ||||||
|     AVCodecContext *avctx; |  | ||||||
|     DSPContext dsp; |  | ||||||
|     HpelDSPContext hdsp; |  | ||||||
|     AVFrame *current_picture; |  | ||||||
|     AVFrame *last_picture; |  | ||||||
|     PutBitContext pb; |  | ||||||
|     GetBitContext gb; |  | ||||||
|  |  | ||||||
|     /* why ooh why this sick breadth first order, |  | ||||||
|      * everything is slower and more complex */ |  | ||||||
|     PutBitContext reorder_pb[6]; |  | ||||||
|  |  | ||||||
|     int frame_width; |  | ||||||
|     int frame_height; |  | ||||||
|  |  | ||||||
|     /* Y plane block dimensions */ |  | ||||||
|     int y_block_width; |  | ||||||
|     int y_block_height; |  | ||||||
|  |  | ||||||
|     /* U & V plane (C planes) block dimensions */ |  | ||||||
|     int c_block_width; |  | ||||||
|     int c_block_height; |  | ||||||
|  |  | ||||||
|     uint16_t *mb_type; |  | ||||||
|     uint32_t *dummy; |  | ||||||
|     int16_t (*motion_val8[3])[2]; |  | ||||||
|     int16_t (*motion_val16[3])[2]; |  | ||||||
|  |  | ||||||
|     int64_t rd_total; |  | ||||||
|  |  | ||||||
|     uint8_t *scratchbuf; |  | ||||||
| } SVQ1EncContext; |  | ||||||
|  |  | ||||||
| static void svq1_write_header(SVQ1EncContext *s, int frame_type) | static void svq1_write_header(SVQ1EncContext *s, int frame_type) | ||||||
| { | { | ||||||
|     int i; |     int i; | ||||||
| @@ -114,6 +77,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type) | |||||||
| #define QUALITY_THRESHOLD    100 | #define QUALITY_THRESHOLD    100 | ||||||
| #define THRESHOLD_MULTIPLIER 0.6 | #define THRESHOLD_MULTIPLIER 0.6 | ||||||
|  |  | ||||||
|  | static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, | ||||||
|  |                                int size) | ||||||
|  | { | ||||||
|  |     int score = 0, i; | ||||||
|  |  | ||||||
|  |     for (i = 0; i < size; i++) | ||||||
|  |         score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); | ||||||
|  |     return score; | ||||||
|  | } | ||||||
|  |  | ||||||
| static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, | static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, | ||||||
|                         uint8_t *decoded, int stride, int level, |                         uint8_t *decoded, int stride, int level, | ||||||
|                         int threshold, int lambda, int intra) |                         int threshold, int lambda, int intra) | ||||||
| @@ -175,7 +148,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, | |||||||
|                 int sqr, diff, score; |                 int sqr, diff, score; | ||||||
|  |  | ||||||
|                 vector = codebook + stage * size * 16 + i * size; |                 vector = codebook + stage * size * 16 + i * size; | ||||||
|                 sqr    = s->dsp.ssd_int8_vs_int16(vector, block[stage], size); |                 sqr    = s->ssd_int8_vs_int16(vector, block[stage], size); | ||||||
|                 diff   = block_sum[stage] - sum; |                 diff   = block_sum[stage] - sum; | ||||||
|                 score  = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow |                 score  = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow | ||||||
|                 if (score < best_vector_score) { |                 if (score < best_vector_score) { | ||||||
| @@ -574,6 +547,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx) | |||||||
|                                         s->y_block_height * sizeof(int16_t)); |                                         s->y_block_height * sizeof(int16_t)); | ||||||
|     s->dummy               = av_mallocz((s->y_block_width + 1) * |     s->dummy               = av_mallocz((s->y_block_width + 1) * | ||||||
|                                         s->y_block_height * sizeof(int32_t)); |                                         s->y_block_height * sizeof(int32_t)); | ||||||
|  |     s->ssd_int8_vs_int16   = ssd_int8_vs_int16_c; | ||||||
|  |  | ||||||
|  |     if (ARCH_PPC) | ||||||
|  |         ff_svq1enc_init_ppc(s); | ||||||
|  |     if (ARCH_X86) | ||||||
|  |         ff_svq1enc_init_x86(s); | ||||||
|  |  | ||||||
|     ff_h263_encode_init(&s->m); // mv_penalty |     ff_h263_encode_init(&s->m); // mv_penalty | ||||||
|  |  | ||||||
|     return 0; |     return 0; | ||||||
|   | |||||||
							
								
								
									
										78
									
								
								libavcodec/svq1enc.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								libavcodec/svq1enc.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | |||||||
|  | /* | ||||||
|  |  * SVQ1 encoder | ||||||
|  |  * | ||||||
|  |  * This file is part of Libav. | ||||||
|  |  * | ||||||
|  |  * Libav is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * Libav is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||||
|  |  * Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public | ||||||
|  |  * License along with Libav; if not, write to the Free Software | ||||||
|  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #ifndef AVCODEC_SVQ1ENC_H | ||||||
|  | #define AVCODEC_SVQ1ENC_H | ||||||
|  |  | ||||||
|  | #include <stdint.h> | ||||||
|  |  | ||||||
|  | #include "libavutil/frame.h" | ||||||
|  | #include "avcodec.h" | ||||||
|  | #include "dsputil.h" | ||||||
|  | #include "get_bits.h" | ||||||
|  | #include "hpeldsp.h" | ||||||
|  | #include "mpegvideo.h" | ||||||
|  | #include "put_bits.h" | ||||||
|  |  | ||||||
|  | typedef struct SVQ1EncContext { | ||||||
|  |     /* FIXME: Needed for motion estimation, should not be used for anything | ||||||
|  |      * else, the idea is to make the motion estimation eventually independent | ||||||
|  |      * of MpegEncContext, so this will be removed then. */ | ||||||
|  |     MpegEncContext m; | ||||||
|  |     AVCodecContext *avctx; | ||||||
|  |     DSPContext dsp; | ||||||
|  |     HpelDSPContext hdsp; | ||||||
|  |     AVFrame *current_picture; | ||||||
|  |     AVFrame *last_picture; | ||||||
|  |     PutBitContext pb; | ||||||
|  |     GetBitContext gb; | ||||||
|  |  | ||||||
|  |     /* why ooh why this sick breadth first order, | ||||||
|  |      * everything is slower and more complex */ | ||||||
|  |     PutBitContext reorder_pb[6]; | ||||||
|  |  | ||||||
|  |     int frame_width; | ||||||
|  |     int frame_height; | ||||||
|  |  | ||||||
|  |     /* Y plane block dimensions */ | ||||||
|  |     int y_block_width; | ||||||
|  |     int y_block_height; | ||||||
|  |  | ||||||
|  |     /* U & V plane (C planes) block dimensions */ | ||||||
|  |     int c_block_width; | ||||||
|  |     int c_block_height; | ||||||
|  |  | ||||||
|  |     uint16_t *mb_type; | ||||||
|  |     uint32_t *dummy; | ||||||
|  |     int16_t (*motion_val8[3])[2]; | ||||||
|  |     int16_t (*motion_val16[3])[2]; | ||||||
|  |  | ||||||
|  |     int64_t rd_total; | ||||||
|  |  | ||||||
|  |     uint8_t *scratchbuf; | ||||||
|  |  | ||||||
|  |     int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | ||||||
|  |                              int size); | ||||||
|  | } SVQ1EncContext; | ||||||
|  |  | ||||||
|  | void ff_svq1enc_init_ppc(SVQ1EncContext *c); | ||||||
|  | void ff_svq1enc_init_x86(SVQ1EncContext *c); | ||||||
|  |  | ||||||
|  | #endif /* AVCODEC_SVQ1ENC_H */ | ||||||
| @@ -51,6 +51,7 @@ MMX-OBJS-$(CONFIG_HPELDSP)             += x86/fpel_mmx.o                \ | |||||||
|                                           x86/hpeldsp_mmx.o |                                           x86/hpeldsp_mmx.o | ||||||
| MMX-OBJS-$(CONFIG_HUFFYUVDSP)          += x86/huffyuvdsp_mmx.o | MMX-OBJS-$(CONFIG_HUFFYUVDSP)          += x86/huffyuvdsp_mmx.o | ||||||
|  |  | ||||||
|  | MMX-OBJS-$(CONFIG_SVQ1_ENCODER)        += x86/svq1enc_mmx.o | ||||||
| MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o | MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o | ||||||
|  |  | ||||||
| YASM-OBJS                              += x86/deinterlace.o             \ | YASM-OBJS                              += x86/deinterlace.o             \ | ||||||
|   | |||||||
| @@ -805,40 +805,6 @@ DCT_SAD_FUNC(ssse3) | |||||||
| #undef HSUM | #undef HSUM | ||||||
| #undef DCT_SAD | #undef DCT_SAD | ||||||
|  |  | ||||||
| static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, |  | ||||||
|                                  int size) |  | ||||||
| { |  | ||||||
|     int sum; |  | ||||||
|     x86_reg i = size; |  | ||||||
|  |  | ||||||
|     __asm__ volatile ( |  | ||||||
|         "pxor %%mm4, %%mm4 \n" |  | ||||||
|         "1: \n" |  | ||||||
|         "sub $8, %0 \n" |  | ||||||
|         "movq (%2, %0), %%mm2 \n" |  | ||||||
|         "movq (%3, %0, 2), %%mm0 \n" |  | ||||||
|         "movq 8(%3, %0, 2), %%mm1 \n" |  | ||||||
|         "punpckhbw %%mm2, %%mm3 \n" |  | ||||||
|         "punpcklbw %%mm2, %%mm2 \n" |  | ||||||
|         "psraw $8, %%mm3 \n" |  | ||||||
|         "psraw $8, %%mm2 \n" |  | ||||||
|         "psubw %%mm3, %%mm1 \n" |  | ||||||
|         "psubw %%mm2, %%mm0 \n" |  | ||||||
|         "pmaddwd %%mm1, %%mm1 \n" |  | ||||||
|         "pmaddwd %%mm0, %%mm0 \n" |  | ||||||
|         "paddd %%mm1, %%mm4 \n" |  | ||||||
|         "paddd %%mm0, %%mm4 \n" |  | ||||||
|         "jg 1b \n" |  | ||||||
|         "movq %%mm4, %%mm3 \n" |  | ||||||
|         "psrlq $32, %%mm3 \n" |  | ||||||
|         "paddd %%mm3, %%mm4 \n" |  | ||||||
|         "movd %%mm4, %1 \n" |  | ||||||
|         : "+r" (i), "=r" (sum) |  | ||||||
|         : "r" (pix1), "r" (pix2)); |  | ||||||
|  |  | ||||||
|     return sum; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #define PHADDD(a, t)                            \ | #define PHADDD(a, t)                            \ | ||||||
|     "movq  " #a ", " #t "               \n\t"   \ |     "movq  " #a ", " #t "               \n\t"   \ | ||||||
|     "psrlq    $32, " #a "               \n\t"   \ |     "psrlq    $32, " #a "               \n\t"   \ | ||||||
| @@ -958,8 +924,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||||
|             c->try_8x8basis = try_8x8basis_mmx; |             c->try_8x8basis = try_8x8basis_mmx; | ||||||
|         } |         } | ||||||
|         c->add_8x8basis = add_8x8basis_mmx; |         c->add_8x8basis = add_8x8basis_mmx; | ||||||
|  |  | ||||||
|         c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (INLINE_AMD3DNOW(cpu_flags)) { |     if (INLINE_AMD3DNOW(cpu_flags)) { | ||||||
|   | |||||||
							
								
								
									
										73
									
								
								libavcodec/x86/svq1enc_mmx.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								libavcodec/x86/svq1enc_mmx.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | |||||||
|  | /* | ||||||
|  |  * This file is part of Libav. | ||||||
|  |  * | ||||||
|  |  * Libav is free software; you can redistribute it and/or | ||||||
|  |  * modify it under the terms of the GNU Lesser General Public | ||||||
|  |  * License as published by the Free Software Foundation; either | ||||||
|  |  * version 2.1 of the License, or (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * Libav is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | ||||||
|  |  * Lesser General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU Lesser General Public | ||||||
|  |  * License along with Libav; if not, write to the Free Software | ||||||
|  |  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | #include "config.h" | ||||||
|  | #include "libavutil/attributes.h" | ||||||
|  | #include "libavutil/cpu.h" | ||||||
|  | #include "libavutil/x86/asm.h" | ||||||
|  | #include "libavutil/x86/cpu.h" | ||||||
|  | #include "libavcodec/svq1enc.h" | ||||||
|  |  | ||||||
|  | #if HAVE_INLINE_ASM | ||||||
|  |  | ||||||
|  | static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, | ||||||
|  |                                  int size) | ||||||
|  | { | ||||||
|  |     int sum; | ||||||
|  |     x86_reg i = size; | ||||||
|  |  | ||||||
|  |     __asm__ volatile ( | ||||||
|  |         "pxor %%mm4, %%mm4 \n" | ||||||
|  |         "1: \n" | ||||||
|  |         "sub $8, %0 \n" | ||||||
|  |         "movq (%2, %0), %%mm2 \n" | ||||||
|  |         "movq (%3, %0, 2), %%mm0 \n" | ||||||
|  |         "movq 8(%3, %0, 2), %%mm1 \n" | ||||||
|  |         "punpckhbw %%mm2, %%mm3 \n" | ||||||
|  |         "punpcklbw %%mm2, %%mm2 \n" | ||||||
|  |         "psraw $8, %%mm3 \n" | ||||||
|  |         "psraw $8, %%mm2 \n" | ||||||
|  |         "psubw %%mm3, %%mm1 \n" | ||||||
|  |         "psubw %%mm2, %%mm0 \n" | ||||||
|  |         "pmaddwd %%mm1, %%mm1 \n" | ||||||
|  |         "pmaddwd %%mm0, %%mm0 \n" | ||||||
|  |         "paddd %%mm1, %%mm4 \n" | ||||||
|  |         "paddd %%mm0, %%mm4 \n" | ||||||
|  |         "jg 1b \n" | ||||||
|  |         "movq %%mm4, %%mm3 \n" | ||||||
|  |         "psrlq $32, %%mm3 \n" | ||||||
|  |         "paddd %%mm3, %%mm4 \n" | ||||||
|  |         "movd %%mm4, %1 \n" | ||||||
|  |         : "+r" (i), "=r" (sum) | ||||||
|  |         : "r" (pix1), "r" (pix2)); | ||||||
|  |  | ||||||
|  |     return sum; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif /* HAVE_INLINE_ASM */ | ||||||
|  |  | ||||||
|  | av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) | ||||||
|  | { | ||||||
|  | #if HAVE_INLINE_ASM | ||||||
|  |     int cpu_flags = av_get_cpu_flags(); | ||||||
|  |  | ||||||
|  |     if (INLINE_MMX(cpu_flags)) { | ||||||
|  |         c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; | ||||||
|  |     } | ||||||
|  | #endif /* HAVE_INLINE_ASM */ | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user
	 Diego Biurrun
					Diego Biurrun