mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-31 20:42:49 +08:00 
			
		
		
		
	 04d092e7d5
			
		
	
	04d092e7d5
	
	
	
		
			
			RV64G supports MIN & MAX instructions natively only on floating point registers, not general purpose ones. The later would require the Zbb extension. Due to that, it is actually faster to perform the clipping "properly" in FPU. Benchmarks on SiFive U74-MC (courtesy of Shanghai StarFive Tech): audiodsp.vector_clipf_c: 29551.5 audiodsp.vector_clipf_rvf: 17871.0 Also tried unrolling with 2 or 8 elements but it gets worse either way.
		
			
				
	
	
		
			50 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			50 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  * Copyright © 2022 Rémi Denis-Courmont.
 | |
|  *
 | |
|  * This file is part of FFmpeg.
 | |
|  *
 | |
|  * FFmpeg is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * FFmpeg is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with FFmpeg; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| #include "libavutil/riscv/asm.S"
 | |
| 
 | |
| func ff_vector_clipf_rvf, f
 | |
| NOHWF   fmv.w.x fa0, a3
 | |
| NOHWF   fmv.w.x fa1, a4
 | |
| 1:
 | |
|         flw     ft0,   (a1)
 | |
|         flw     ft1,  4(a1)
 | |
|         fmax.s  ft0, ft0, fa0
 | |
|         flw     ft2,  8(a1)
 | |
|         fmax.s  ft1, ft1, fa0
 | |
|         flw     ft3, 12(a1)
 | |
|         fmax.s  ft2, ft2, fa0
 | |
|         addi    a2, a2, -4
 | |
|         fmax.s  ft3, ft3, fa0
 | |
|         addi    a1, a1, 16
 | |
|         fmin.s  ft0, ft0, fa1
 | |
|         fmin.s  ft1, ft1, fa1
 | |
|         fsw     ft0,   (a0)
 | |
|         fmin.s  ft2, ft2, fa1
 | |
|         fsw     ft1,  4(a0)
 | |
|         fmin.s  ft3, ft3, fa1
 | |
|         fsw     ft2,  8(a0)
 | |
|         fsw     ft3, 12(a0)
 | |
|         addi    a0, a0, 16
 | |
|         bnez    a2, 1b
 | |
| 
 | |
|         ret
 | |
| endfunc
 |