mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-31 04:26:37 +08:00 
			
		
		
		
	ARMv6 optimised put_pixels functions except xy2 variants
Originally committed as revision 21696 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
		| @@ -22,6 +22,244 @@ | ||||
|  | ||||
|         .text | ||||
|  | ||||
| .macro  call_2x_pixels  type, subp | ||||
| function ff_\type\()_pixels16\subp\()_armv6, export=1 | ||||
|         push            {r0-r3, lr} | ||||
|         bl              ff_\type\()_pixels8\subp\()_armv6 | ||||
|         pop             {r0-r3, lr} | ||||
|         add             r0,  r0,  #8 | ||||
|         add             r1,  r1,  #8 | ||||
|         b               ff_\type\()_pixels8\subp\()_armv6 | ||||
| .endfunc | ||||
| .endm | ||||
|  | ||||
| call_2x_pixels          avg | ||||
| call_2x_pixels          put, _x2 | ||||
| call_2x_pixels          put, _y2 | ||||
| call_2x_pixels          put, _x2_no_rnd | ||||
| call_2x_pixels          put, _y2_no_rnd | ||||
|  | ||||
| function ff_put_pixels16_armv6, export=1 | ||||
|         push            {r4-r11} | ||||
| 1: | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r6,  [r1, #8] | ||||
|         ldr             r7,  [r1, #12] | ||||
|         ldr             r4,  [r1], r2 | ||||
|         strd            r6,  r7,  [r0, #8] | ||||
|         ldr             r9,  [r1, #4] | ||||
|         strd            r4,  r5,  [r0],  r2 | ||||
|         ldr             r10, [r1, #8] | ||||
|         ldr             r11, [r1, #12] | ||||
|         ldr             r8,  [r1], r2 | ||||
|         strd            r10, r11, [r0, #8] | ||||
|         subs            r3,  r3,  #2 | ||||
|         strd            r8,  r9,  [r0],  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r11} | ||||
|         bx              lr | ||||
| .endfunc | ||||
|  | ||||
| function ff_put_pixels8_armv6, export=1 | ||||
|         push            {r4-r7} | ||||
| 1: | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r4,  [r1], r2 | ||||
|         ldr             r7,  [r1, #4] | ||||
|         strd            r4,  r5,  [r0],  r2 | ||||
|         ldr             r6,  [r1], r2 | ||||
|         subs            r3,  r3,  #2 | ||||
|         strd            r6,  r7,  [r0],  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r7} | ||||
|         bx              lr | ||||
| .endfunc | ||||
|  | ||||
| function ff_put_pixels8_x2_armv6, export=1 | ||||
|         push            {r4-r11, lr} | ||||
|         mov             r12, #1 | ||||
|         orr             r12, r12, r12, lsl #8 | ||||
|         orr             r12, r12, r12, lsl #16 | ||||
| 1: | ||||
|         ldr             r4,  [r1] | ||||
|         subs            r3,  r3,  #2 | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r7,  [r1, #5] | ||||
|         lsr             r6,  r4,  #8 | ||||
|         ldr             r8,  [r1, r2]! | ||||
|         orr             r6,  r6,  r5,  lsl #24 | ||||
|         ldr             r9,  [r1, #4] | ||||
|         ldr             r11, [r1, #5] | ||||
|         lsr             r10, r8,  #8 | ||||
|         add             r1,  r1,  r2 | ||||
|         orr             r10, r10, r9,  lsl #24 | ||||
|         eor             r14, r4,  r6 | ||||
|         uhadd8          r4,  r4,  r6 | ||||
|         eor             r6,  r5,  r7 | ||||
|         uhadd8          r5,  r5,  r7 | ||||
|         and             r14, r14, r12 | ||||
|         and             r6,  r6,  r12 | ||||
|         uadd8           r4,  r4,  r14 | ||||
|         eor             r14, r8,  r10 | ||||
|         uadd8           r5,  r5,  r6 | ||||
|         eor             r6,  r9,  r11 | ||||
|         uhadd8          r8,  r8,  r10 | ||||
|         and             r14, r14, r12 | ||||
|         uhadd8          r9,  r9,  r11 | ||||
|         and             r6,  r6,  r12 | ||||
|         uadd8           r8,  r8,  r14 | ||||
|         strd            r4,  r5,  [r0],  r2 | ||||
|         uadd8           r9,  r9,  r6 | ||||
|         strd            r8,  r9,  [r0],  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r11, pc} | ||||
| .endfunc | ||||
|  | ||||
| function ff_put_pixels8_y2_armv6, export=1 | ||||
|         push            {r4-r11} | ||||
|         mov             r12, #1 | ||||
|         orr             r12, r12, r12, lsl #8 | ||||
|         orr             r12, r12, r12, lsl #16 | ||||
|         ldr             r4,  [r1] | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         ldr             r7,  [r1, #4] | ||||
| 1: | ||||
|         subs            r3,  r3,  #2 | ||||
|         uhadd8          r8,  r4,  r6 | ||||
|         eor             r10, r4,  r6 | ||||
|         uhadd8          r9,  r5,  r7 | ||||
|         eor             r11, r5,  r7 | ||||
|         and             r10, r10, r12 | ||||
|         ldr             r4,  [r1, r2]! | ||||
|         uadd8           r8,  r8,  r10 | ||||
|         and             r11, r11, r12 | ||||
|         uadd8           r9,  r9,  r11 | ||||
|         ldr             r5,  [r1, #4] | ||||
|         uhadd8          r10, r4,  r6 | ||||
|         eor             r6,  r4,  r6 | ||||
|         uhadd8          r11, r5,  r7 | ||||
|         and             r6,  r6,  r12 | ||||
|         eor             r7,  r5,  r7 | ||||
|         uadd8           r10, r10, r6 | ||||
|         and             r7,  r7,  r12 | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         uadd8           r11, r11, r7 | ||||
|         strd            r8,  r9,  [r0],  r2 | ||||
|         ldr             r7,  [r1, #4] | ||||
|         strd            r10, r11, [r0],  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r11} | ||||
|         bx              lr | ||||
| .endfunc | ||||
|  | ||||
| function ff_put_pixels8_x2_no_rnd_armv6, export=1 | ||||
|         push            {r4-r9, lr} | ||||
| 1: | ||||
|         subs            r3,  r3,  #2 | ||||
|         ldr             r4,  [r1] | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r7,  [r1, #5] | ||||
|         ldr             r8,  [r1, r2]! | ||||
|         ldr             r9,  [r1, #4] | ||||
|         ldr             r14, [r1, #5] | ||||
|         add             r1,  r1,  r2 | ||||
|         lsr             r6,  r4,  #8 | ||||
|         orr             r6,  r6,  r5,  lsl #24 | ||||
|         lsr             r12, r8,  #8 | ||||
|         orr             r12, r12, r9,  lsl #24 | ||||
|         uhadd8          r4,  r4,  r6 | ||||
|         uhadd8          r5,  r5,  r7 | ||||
|         uhadd8          r8,  r8,  r12 | ||||
|         uhadd8          r9,  r9,  r14 | ||||
|         stm             r0,  {r4,r5} | ||||
|         add             r0,  r0,  r2 | ||||
|         stm             r0,  {r8,r9} | ||||
|         add             r0,  r0,  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r9, pc} | ||||
| .endfunc | ||||
|  | ||||
| function ff_put_pixels8_y2_no_rnd_armv6, export=1 | ||||
|         push            {r4-r9, lr} | ||||
|         ldr             r4,  [r1] | ||||
|         ldr             r5,  [r1, #4] | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         ldr             r7,  [r1, #4] | ||||
| 1: | ||||
|         subs            r3,  r3,  #2 | ||||
|         uhadd8          r8,  r4,  r6 | ||||
|         ldr             r4,  [r1, r2]! | ||||
|         uhadd8          r9,  r5,  r7 | ||||
|         ldr             r5,  [r1, #4] | ||||
|         uhadd8          r12, r4,  r6 | ||||
|         ldr             r6,  [r1, r2]! | ||||
|         uhadd8          r14, r5,  r7 | ||||
|         ldr             r7,  [r1, #4] | ||||
|         stm             r0,  {r8,r9} | ||||
|         add             r0,  r0,  r2 | ||||
|         stm             r0,  {r12,r14} | ||||
|         add             r0,  r0,  r2 | ||||
|         bne             1b | ||||
|  | ||||
|         pop             {r4-r9, pc} | ||||
| .endfunc | ||||
|  | ||||
| function ff_avg_pixels8_armv6, export=1 | ||||
|         pld             [r1, r2] | ||||
|         push            {r4-r10, lr} | ||||
|         mov             lr,  #1 | ||||
|         orr             lr,  lr,  lr,  lsl #8 | ||||
|         orr             lr,  lr,  lr,  lsl #16 | ||||
|         ldrd            r4,  r5,  [r0] | ||||
|         ldr             r10, [r1, #4] | ||||
|         ldr             r9,  [r1], r2 | ||||
|         subs            r3,  r3,  #2 | ||||
| 1: | ||||
|         pld             [r1, r2] | ||||
|         eor             r8,  r4,  r9 | ||||
|         uhadd8          r4,  r4,  r9 | ||||
|         eor             r12, r5,  r10 | ||||
|         ldrd            r6,  r7,  [r0, r2] | ||||
|         uhadd8          r5,  r5,  r10 | ||||
|         and             r8,  r8,  lr | ||||
|         ldr             r10, [r1, #4] | ||||
|         and             r12, r12, lr | ||||
|         uadd8           r4,  r4,  r8 | ||||
|         ldr             r9,  [r1], r2 | ||||
|         eor             r8,  r6,  r9 | ||||
|         uadd8           r5,  r5,  r12 | ||||
|         pld             [r1, r2,  lsl #1] | ||||
|         eor             r12, r7,  r10 | ||||
|         uhadd8          r6,  r6,  r9 | ||||
|         strd            r4,  r5,  [r0], r2 | ||||
|         uhadd8          r7,  r7,  r10 | ||||
|         beq             2f | ||||
|         and             r8,  r8,  lr | ||||
|         ldrd            r4,  r5,  [r0, r2] | ||||
|         uadd8           r6,  r6,  r8 | ||||
|         ldr             r10, [r1, #4] | ||||
|         and             r12, r12, lr | ||||
|         subs            r3,  r3,  #2 | ||||
|         uadd8           r7,  r7,  r12 | ||||
|         ldr             r9,  [r1], r2 | ||||
|         strd            r6,  r7,  [r0], r2 | ||||
|         b               1b | ||||
| 2: | ||||
|         and             r8,  r8,  lr | ||||
|         and             r12, r12, lr | ||||
|         uadd8           r6,  r6,  r8 | ||||
|         uadd8           r7,  r7,  r12 | ||||
|         strd            r6,  r7,  [r0], r2 | ||||
|  | ||||
|         pop             {r4-r10, pc} | ||||
| .endfunc | ||||
|  | ||||
| function ff_add_pixels_clamped_armv6, export=1 | ||||
|         push            {r4-r8,lr} | ||||
|         mov             r3,  #8 | ||||
|   | ||||
| @@ -18,6 +18,9 @@ | ||||
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
|  */ | ||||
|  | ||||
| #include <stdint.h> | ||||
|  | ||||
| #include "libavcodec/avcodec.h" | ||||
| #include "libavcodec/dsputil.h" | ||||
| #include "dsputil_arm.h" | ||||
|  | ||||
| @@ -25,6 +28,24 @@ void ff_simple_idct_armv6(DCTELEM *data); | ||||
| void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); | ||||
| void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); | ||||
|  | ||||
| void ff_put_pixels16_armv6(uint8_t *, const uint8_t *, int, int); | ||||
| void ff_put_pixels16_x2_armv6(uint8_t *, const uint8_t *, int, int); | ||||
| void ff_put_pixels16_y2_armv6(uint8_t *, const uint8_t *, int, int); | ||||
|  | ||||
| void ff_put_pixels16_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | ||||
| void ff_put_pixels16_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | ||||
|  | ||||
| void ff_avg_pixels16_armv6(uint8_t *, const uint8_t *, int, int); | ||||
|  | ||||
| void ff_put_pixels8_armv6(uint8_t *, const uint8_t *, int, int); | ||||
| void ff_put_pixels8_x2_armv6(uint8_t *, const uint8_t *, int, int); | ||||
| void ff_put_pixels8_y2_armv6(uint8_t *, const uint8_t *, int, int); | ||||
|  | ||||
| void ff_put_pixels8_x2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | ||||
| void ff_put_pixels8_y2_no_rnd_armv6(uint8_t *, const uint8_t *, int, int); | ||||
|  | ||||
| void ff_avg_pixels8_armv6(uint8_t *, const uint8_t *, int, int); | ||||
|  | ||||
| void ff_add_pixels_clamped_armv6(const DCTELEM *block, | ||||
|                                  uint8_t *restrict pixels, | ||||
|                                  int line_size); | ||||
| @@ -39,5 +60,26 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) | ||||
|         c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; | ||||
|     } | ||||
|  | ||||
|     c->put_pixels_tab[0][0] = ff_put_pixels16_armv6; | ||||
|     c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6; | ||||
|     c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6; | ||||
| /*     c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_armv6; */ | ||||
|     c->put_pixels_tab[1][0] = ff_put_pixels8_armv6; | ||||
|     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_armv6; | ||||
|     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_armv6; | ||||
| /*     c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_armv6; */ | ||||
|  | ||||
|     c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_armv6; | ||||
|     c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_armv6; | ||||
|     c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_armv6; | ||||
| /*     c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_armv6; */ | ||||
|     c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_armv6; | ||||
|     c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_armv6; | ||||
|     c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_armv6; | ||||
| /*     c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_armv6; */ | ||||
|  | ||||
|     c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6; | ||||
|     c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6; | ||||
|  | ||||
|     c->add_pixels_clamped = ff_add_pixels_clamped_armv6; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Måns Rullgård
					Måns Rullgård