mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-31 20:42:49 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			292 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			292 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
 | |
|  *
 | |
|  * This file is part of FFmpeg.
 | |
|  *
 | |
|  * FFmpeg is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * FFmpeg is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with FFmpeg; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| #include "libavutil/arm/asm.S"
 | |
| 
 | |
| .macro alias name, tgt, set=1
 | |
| .if \set != 0
 | |
|     \name   .req    \tgt
 | |
| .else
 | |
|     .unreq  \name
 | |
| .endif
 | |
| .endm
 | |
| 
 | |
| .altmacro
 | |
| 
 | |
| .macro alias_dw_all qw, dw_l, dw_h
 | |
|     alias   q\qw\()_l, d\dw_l
 | |
|     alias   q\qw\()_h, d\dw_h
 | |
|     .if \qw < 15
 | |
|         alias_dw_all  %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
 | |
|     .endif
 | |
| .endm
 | |
| 
 | |
| alias_dw_all    0, 0, 1
 | |
| 
 | |
| .noaltmacro
 | |
| 
 | |
| .macro alias_qw     name, qw, set=1
 | |
|     alias   \name\(), \qw, \set
 | |
|     alias   \name\()_l, \qw\()_l, \set
 | |
|     alias   \name\()_h, \qw\()_h, \set
 | |
| .endm
 | |
| 
 | |
| .macro prologue
 | |
|     push            {r4-r12, lr}
 | |
|     vpush           {q4-q7}
 | |
| .endm
 | |
| 
 | |
| .macro epilogue
 | |
|     vpop            {q4-q7}
 | |
|     pop             {r4-r12, pc}
 | |
| .endm
 | |
| 
 | |
| .macro  load_arg    reg, ix
 | |
|     ldr     \reg,   [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
 | |
| .endm
 | |
| 
 | |
| 
 | |
| /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
 | |
|  *                  int width, int height,
 | |
|  *                  int y_stride, int c_stride, int src_stride,
 | |
|  *                  int32_t coeff_table[9]);
 | |
|  */
 | |
| .macro  alias_loop_420sp set=1
 | |
|     alias   src,        r0, \set
 | |
|     alias   src0,       src, \set
 | |
|     alias   y,          r1, \set
 | |
|     alias   y0,         y, \set
 | |
|     alias   chroma,     r2, \set
 | |
|     alias   width,      r3, \set
 | |
|     alias   header,     width, \set
 | |
| 
 | |
|     alias   height,     r4, \set
 | |
|     alias   y_stride,   r5, \set
 | |
|     alias   c_stride,   r6, \set
 | |
|     alias   c_padding,  c_stride, \set
 | |
|     alias   src_stride, r7, \set
 | |
| 
 | |
|     alias   y0_end,     r8, \set
 | |
| 
 | |
|     alias   src_padding,r9, \set
 | |
|     alias   y_padding,  r10, \set
 | |
| 
 | |
|     alias   src1,       r11, \set
 | |
|     alias   y1,         r12, \set
 | |
| 
 | |
|     alias   coeff_table,r12, \set
 | |
| .endm
 | |
| 
 | |
| 
 | |
| .macro  loop_420sp s_fmt, d_fmt, init, kernel, precision
 | |
| 
 | |
| function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
 | |
|     prologue
 | |
| 
 | |
|     alias_loop_420sp
 | |
| 
 | |
|     load_arg    height,         4
 | |
|     load_arg    y_stride,       5
 | |
|     load_arg    c_stride,       6
 | |
|     load_arg    src_stride,     7
 | |
|     load_arg    coeff_table,    8
 | |
| 
 | |
|     \init       coeff_table
 | |
| 
 | |
|     sub         y_padding,      y_stride,       width
 | |
|     sub         c_padding,      c_stride,       width
 | |
|     sub         src_padding,    src_stride,     width, LSL #2
 | |
| 
 | |
|     add         y0_end,         y0,             width
 | |
|     and         header,         width,          #15
 | |
| 
 | |
|     add         y1,             y0,             y_stride
 | |
|     add         src1,           src0,           src_stride
 | |
| 
 | |
| 0:
 | |
|     cmp         header,     #0
 | |
|     beq         1f
 | |
| 
 | |
|     \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
 | |
| 
 | |
| 1:
 | |
|     \kernel     \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
 | |
| 
 | |
|     cmp         y0,         y0_end
 | |
|     blt         1b
 | |
| 2:
 | |
|     add         y0,         y1,         y_padding
 | |
|     add         y0_end,     y1,         y_stride
 | |
|     add         chroma,     chroma,     c_padding
 | |
|     add         src0,       src1,       src_padding
 | |
| 
 | |
|     add         y1,         y0,         y_stride
 | |
|     add         src1,       src0,       src_stride
 | |
| 
 | |
|     subs        height,     height,     #2
 | |
| 
 | |
|     bgt         0b
 | |
| 
 | |
|     epilogue
 | |
| 
 | |
|     alias_loop_420sp 0
 | |
| 
 | |
| endfunc
 | |
| .endm
 | |
| 
 | |
| .macro downsample
 | |
|     vpaddl.u8   r16x8,  r8x16
 | |
|     vpaddl.u8   g16x8,  g8x16
 | |
|     vpaddl.u8   b16x8,  b8x16
 | |
| .endm
 | |
| 
 | |
| 
 | |
| /* acculumate and right shift by 2 */
 | |
| .macro downsample_ars2
 | |
|     vpadal.u8   r16x8,  r8x16
 | |
|     vpadal.u8   g16x8,  g8x16
 | |
|     vpadal.u8   b16x8,  b8x16
 | |
| 
 | |
|     vrshr.u16   r16x8,  r16x8,  #2
 | |
|     vrshr.u16   g16x8,  g16x8,  #2
 | |
|     vrshr.u16   b16x8,  b16x8,  #2
 | |
| .endm
 | |
| 
 | |
| .macro store_y8_16x1            dst, count
 | |
| .ifc "\count",""
 | |
|     vstmia      \dst!,  {y8x16}
 | |
| .else
 | |
|     vstmia      \dst,   {y8x16}
 | |
|     add         \dst,   \dst,           \count
 | |
| .endif
 | |
| .endm
 | |
| 
 | |
| .macro store_chroma_nv12_8x1    dst, count
 | |
| .ifc "\count",""
 | |
|     vst2.i8     {u8x8, v8x8},   [\dst]!
 | |
| .else
 | |
|     vst2.i8     {u8x8, v8x8},   [\dst], \count
 | |
| .endif
 | |
| .endm
 | |
| 
 | |
| .macro store_chroma_nv21_8x1    dst, count
 | |
| .ifc "\count",""
 | |
|     vst2.i8     {v8x8, u8x8},   [\dst]!
 | |
| .else
 | |
|     vst2.i8     {v8x8, u8x8},   [\dst], \count
 | |
| .endif
 | |
| .endm
 | |
| 
 | |
| .macro load_8888_16x1   a, b, c, d, src, count
 | |
| .ifc "\count",""
 | |
|     vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
 | |
|     vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]!
 | |
| .else
 | |
|     vld4.8      {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l},  [\src]!
 | |
|     vld4.8      {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h},  [\src]
 | |
|     sub         \src,   \src,   #32
 | |
|     add         \src,   \src,   \count, LSL #2
 | |
| .endif
 | |
| .endm
 | |
| 
 | |
| .macro load_rgbx_16x1   src, count
 | |
|     load_8888_16x1  r, g, b, x, \src, \count
 | |
| .endm
 | |
| 
 | |
| .macro load_bgrx_16x1   src, count
 | |
|     load_8888_16x1  b, g, r, x, \src, \count
 | |
| .endm
 | |
| 
 | |
| .macro alias_src_rgbx   set=1
 | |
|     alias_src_8888  r, g, b, x, \set
 | |
| .endm
 | |
| 
 | |
| .macro alias_src_bgrx   set=1
 | |
|     alias_src_8888  b, g, r, x, \set
 | |
| .endm
 | |
| 
 | |
| .macro alias_dst_nv12   set=1
 | |
|     alias   u8x8, c8x8x2_l, \set
 | |
|     alias   v8x8, c8x8x2_h, \set
 | |
| .endm
 | |
| 
 | |
| .macro alias_dst_nv21   set=1
 | |
|     alias   v8x8, c8x8x2_l, \set
 | |
|     alias   u8x8, c8x8x2_h, \set
 | |
| .endm
 | |
| 
 | |
| 
 | |
| // common aliases
 | |
| 
 | |
| alias   CO_R    d0
 | |
| CO_RY   .dn     d0.s16[0]
 | |
| CO_RU   .dn     d0.s16[1]
 | |
| CO_RV   .dn     d0.s16[2]
 | |
| 
 | |
| alias   CO_G    d1
 | |
| CO_GY   .dn     d1.s16[0]
 | |
| CO_GU   .dn     d1.s16[1]
 | |
| CO_GV   .dn     d1.s16[2]
 | |
| 
 | |
| alias   CO_B    d2
 | |
| CO_BY   .dn     d2.s16[0]
 | |
| CO_BU   .dn     d2.s16[1]
 | |
| CO_BV   .dn     d2.s16[2]
 | |
| 
 | |
| alias   BIAS_U, d3
 | |
| alias   BIAS_V, BIAS_U
 | |
| 
 | |
| alias   BIAS_Y, q2
 | |
| 
 | |
| 
 | |
| /* q3-q6 R8G8B8X8 x16 */
 | |
| 
 | |
| .macro alias_src_8888   a, b, c, d, set
 | |
|     alias_qw  \a\()8x16, q3, \set
 | |
|     alias_qw  \b\()8x16, q4, \set
 | |
|     alias_qw  \c\()8x16, q5, \set
 | |
|     alias_qw  \d\()8x16, q6, \set
 | |
| .endm
 | |
| 
 | |
| .macro kernel_420_16x2  rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
 | |
|     alias_src_\rgb_fmt
 | |
|     alias_dst_\yuv_fmt
 | |
| 
 | |
|     load_\rgb_fmt\()_16x1   \rgb0, \count
 | |
| 
 | |
|     downsample
 | |
|     compute_y_16x1
 | |
|     store_y8_16x1   \y0, \count
 | |
| 
 | |
| 
 | |
|     load_\rgb_fmt\()_16x1   \rgb1, \count
 | |
|     downsample_ars2
 | |
|     compute_y_16x1
 | |
|     store_y8_16x1   \y1, \count
 | |
| 
 | |
|     compute_chroma_8x1  u, U
 | |
|     compute_chroma_8x1  v, V
 | |
| 
 | |
|     store_chroma_\yuv_fmt\()_8x1 \chroma, \count
 | |
| 
 | |
|     alias_dst_\yuv_fmt 0
 | |
|     alias_src_\rgb_fmt 0
 | |
| .endm
 | 
