mirror of
https://github.com/nyanmisaka/ffmpeg-rockchip.git
synced 2025-11-01 21:12:53 +08:00
mips: port optimizations to mips n64
This mainly consists of replacing all the pointer arithmatic 'addiu' instructions with PTR_ADDIU which will handle the differences in pointer sizes when compiled on 64 bit mips systems. The header asmdefs.h contains the PTR_ macros which expend to the correct mips instructions to manipulate registers containing pointers. Signed-off-by: James Cowgill <james410@cowgill.org.uk> Reviewed-by: Nedeljko Babic <Nedeljko.Babic@imgtec.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
eae13eae9d
commit
157d6f0d5b
@@ -54,6 +54,7 @@
|
||||
|
||||
#include "config.h"
|
||||
#include "libavcodec/aacpsdsp.h"
|
||||
#include "libavutil/mips/asmdefs.h"
|
||||
|
||||
#if HAVE_INLINE_ASM
|
||||
static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
|
||||
@@ -86,8 +87,8 @@ static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][6
|
||||
"sw %[temp5], 20(%[out1]) \n\t"
|
||||
"sw %[temp6], 24(%[out1]) \n\t"
|
||||
"sw %[temp7], 28(%[out1]) \n\t"
|
||||
"addiu %[out1], %[out1], 32 \n\t"
|
||||
"addiu %[L1], %[L1], 1024 \n\t"
|
||||
PTR_ADDIU "%[out1], %[out1], 32 \n\t"
|
||||
PTR_ADDIU "%[L1], %[L1], 1024 \n\t"
|
||||
"bne %[out1], %[j], 1b \n\t"
|
||||
|
||||
: [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
|
||||
@@ -128,10 +129,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
|
||||
"lw %[temp5], 16(%[in2]) \n\t"
|
||||
"lw %[temp6], 24(%[in1]) \n\t"
|
||||
"lw %[temp7], 24(%[in2]) \n\t"
|
||||
"addiu %[out1], %[out1], 1024 \n\t"
|
||||
"addiu %[out2], %[out2], 1024 \n\t"
|
||||
"addiu %[in1], %[in1], 32 \n\t"
|
||||
"addiu %[in2], %[in2], 32 \n\t"
|
||||
PTR_ADDIU "%[out1], %[out1], 1024 \n\t"
|
||||
PTR_ADDIU "%[out2], %[out2], 1024 \n\t"
|
||||
PTR_ADDIU "%[in1], %[in1], 32 \n\t"
|
||||
PTR_ADDIU "%[in2], %[in2], 32 \n\t"
|
||||
"sw %[temp0], -1024(%[out1]) \n\t"
|
||||
"sw %[temp1], -1024(%[out2]) \n\t"
|
||||
"sw %[temp2], -768(%[out1]) \n\t"
|
||||
@@ -161,10 +162,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
|
||||
"lw %[temp5], 16(%[in2]) \n\t"
|
||||
"lw %[temp6], 24(%[in1]) \n\t"
|
||||
"lw %[temp7], 24(%[in2]) \n\t"
|
||||
"addiu %[out1], %[out1], -7164 \n\t"
|
||||
"addiu %[out2], %[out2], -7164 \n\t"
|
||||
"addiu %[in1], %[in1], 32 \n\t"
|
||||
"addiu %[in2], %[in2], 32 \n\t"
|
||||
PTR_ADDIU "%[out1], %[out1], -7164 \n\t"
|
||||
PTR_ADDIU "%[out2], %[out2], -7164 \n\t"
|
||||
PTR_ADDIU "%[in1], %[in1], 32 \n\t"
|
||||
PTR_ADDIU "%[in2], %[in2], 32 \n\t"
|
||||
"sw %[temp0], 7164(%[out1]) \n\t"
|
||||
"sw %[temp1], 7164(%[out2]) \n\t"
|
||||
"sw %[temp2], 7420(%[out1]) \n\t"
|
||||
@@ -226,8 +227,8 @@ static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
|
||||
"swc1 %[temp2], 4(%[dst0]) \n\t"
|
||||
"swc1 %[temp4], 8(%[dst0]) \n\t"
|
||||
"swc1 %[temp6], 12(%[dst0]) \n\t"
|
||||
"addiu %[dst0], %[dst0], 16 \n\t"
|
||||
"addiu %[src0], %[src0], 32 \n\t"
|
||||
PTR_ADDIU "%[dst0], %[dst0], 16 \n\t"
|
||||
PTR_ADDIU "%[src0], %[src0], 32 \n\t"
|
||||
|
||||
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
|
||||
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
|
||||
@@ -257,14 +258,14 @@ static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *sr
|
||||
"lwc1 %[temp2], 0(%[p_s1]) \n\t"
|
||||
"lwc1 %[temp0], 0(%[p_s0]) \n\t"
|
||||
"lwc1 %[temp1], 4(%[p_s0]) \n\t"
|
||||
"addiu %[p_d], %[p_d], 8 \n\t"
|
||||
PTR_ADDIU "%[p_d], %[p_d], 8 \n\t"
|
||||
"mul.s %[temp0], %[temp0], %[temp2] \n\t"
|
||||
"mul.s %[temp1], %[temp1], %[temp2] \n\t"
|
||||
"addiu %[p_s0], %[p_s0], 8 \n\t"
|
||||
PTR_ADDIU "%[p_s0], %[p_s0], 8 \n\t"
|
||||
"swc1 %[temp0], -8(%[p_d]) \n\t"
|
||||
"swc1 %[temp1], -4(%[p_d]) \n\t"
|
||||
"bne %[p_s1], %[end], 1b \n\t"
|
||||
" addiu %[p_s1], %[p_s1], 4 \n\t"
|
||||
PTR_ADDIU "%[p_s1], %[p_s1], 4 \n\t"
|
||||
".set pop \n\t"
|
||||
|
||||
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
|
||||
@@ -355,13 +356,13 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
|
||||
"mul.s %[temp1], %[ag2], %[temp3] \n\t"
|
||||
"lwc1 %[temp4], 0(%[p_t_gain]) \n\t"
|
||||
"sub.s %[temp0], %[temp8], %[temp0] \n\t"
|
||||
"addiu %[p_ap_delay], %[p_ap_delay], 8 \n\t"
|
||||
PTR_ADDIU "%[p_ap_delay], %[p_ap_delay], 8 \n\t"
|
||||
"sub.s %[temp1], %[temp9], %[temp1] \n\t"
|
||||
"addiu %[p_t_gain], %[p_t_gain], 4 \n\t"
|
||||
PTR_ADDIU "%[p_t_gain], %[p_t_gain], 4 \n\t"
|
||||
"madd.s %[temp2], %[temp2], %[ag2], %[temp0] \n\t"
|
||||
"addiu %[p_delay], %[p_delay], 8 \n\t"
|
||||
PTR_ADDIU "%[p_delay], %[p_delay], 8 \n\t"
|
||||
"madd.s %[temp3], %[temp3], %[ag2], %[temp1] \n\t"
|
||||
"addiu %[p_out], %[p_out], 8 \n\t"
|
||||
PTR_ADDIU "%[p_out], %[p_out], 8 \n\t"
|
||||
"mul.s %[temp5], %[temp4], %[temp0] \n\t"
|
||||
"mul.s %[temp6], %[temp4], %[temp1] \n\t"
|
||||
"swc1 %[temp2], 624(%[p_ap_delay]) \n\t"
|
||||
@@ -414,9 +415,9 @@ static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
|
||||
"add.s %[h3], %[h3], %[hs3] \n\t"
|
||||
"lwc1 %[r_im], 4(%[r]) \n\t"
|
||||
"mul.s %[temp0], %[h0], %[l_re] \n\t"
|
||||
"addiu %[l], %[l], 8 \n\t"
|
||||
PTR_ADDIU "%[l], %[l], 8 \n\t"
|
||||
"mul.s %[temp2], %[h1], %[l_re] \n\t"
|
||||
"addiu %[r], %[r], 8 \n\t"
|
||||
PTR_ADDIU "%[r], %[r], 8 \n\t"
|
||||
"madd.s %[temp0], %[temp0], %[h2], %[r_re] \n\t"
|
||||
"madd.s %[temp2], %[temp2], %[h3], %[r_re] \n\t"
|
||||
"mul.s %[temp1], %[h0], %[l_im] \n\t"
|
||||
|
||||
Reference in New Issue
Block a user