mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-31 12:36:41 +08:00 
			
		
		
		
	lavc/audiodsp: rework RISC-V V scalar product
Take vector reduction out of the loop and unroll. Before: audiodsp.scalarproduct_int16_c: 12321.0 audiodsp.scalarproduct_int16_rvv_i32: 4175.7 After: audiodsp.scalarproduct_int16_c: 12320.5 audiodsp.scalarproduct_int16_rvv_i32: 1230.2
This commit is contained in:
		| @@ -21,21 +21,22 @@ | ||||
| #include "libavutil/riscv/asm.S" | ||||
|  | ||||
| func ff_scalarproduct_int16_rvv, zve32x | ||||
|         vsetivli    zero, 1, e32, m1, ta, ma | ||||
|         vmv.s.x     v8, zero | ||||
|         vsetvli     t0, zero, e32, m8, ta, ma | ||||
|         vmv.v.x     v8, zero | ||||
|         vmv.s.x     v0, zero | ||||
| 1: | ||||
|         vsetvli     t0, a2, e16, m1, ta, ma | ||||
|         vsetvli     t0, a2, e16, m4, tu, ma | ||||
|         vle16.v     v16, (a0) | ||||
|         sub         a2, a2, t0 | ||||
|         vle16.v     v24, (a1) | ||||
|         sh1add      a0, t0, a0 | ||||
|         vwmul.vv    v0, v16, v24 | ||||
|         vwmacc.vv   v8, v16, v24 | ||||
|         sh1add      a1, t0, a1 | ||||
|         vsetvli     zero, t0, e32, m2, ta, ma | ||||
|         vredsum.vs  v8, v0, v8 | ||||
|         bnez        a2, 1b | ||||
|  | ||||
|         vmv.x.s     a0, v8 | ||||
|         vsetvli     t0, zero, e32, m8, ta, ma | ||||
|         vredsum.vs  v0, v8, v0 | ||||
|         vmv.x.s     a0, v0 | ||||
|         ret | ||||
| endfunc | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Rémi Denis-Courmont
					Rémi Denis-Courmont