mips: port optimizations to mips n64

This mainly consists of replacing all the pointer arithmatic 'addiu'
instructions with PTR_ADDIU which will handle the differences in pointer
sizes when compiled on 64 bit mips systems.

The header asmdefs.h contains the PTR_ macros which expend to the correct mips
instructions to manipulate registers containing pointers.

Signed-off-by: James Cowgill <james410@cowgill.org.uk>
Reviewed-by: Nedeljko Babic <Nedeljko.Babic@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
James Cowgill
2015-03-05 17:40:15 +00:00
committed by Michael Niedermayer
parent eae13eae9d
commit 157d6f0d5b
20 changed files with 247 additions and 178 deletions

View File

@@ -54,6 +54,7 @@
#include "config.h"
#include "libavcodec/aacpsdsp.h"
#include "libavutil/mips/asmdefs.h"
#if HAVE_INLINE_ASM
static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
@@ -86,8 +87,8 @@ static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][6
"sw %[temp5], 20(%[out1]) \n\t"
"sw %[temp6], 24(%[out1]) \n\t"
"sw %[temp7], 28(%[out1]) \n\t"
"addiu %[out1], %[out1], 32 \n\t"
"addiu %[L1], %[L1], 1024 \n\t"
PTR_ADDIU "%[out1], %[out1], 32 \n\t"
PTR_ADDIU "%[L1], %[L1], 1024 \n\t"
"bne %[out1], %[j], 1b \n\t"
: [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
@@ -128,10 +129,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
"lw %[temp5], 16(%[in2]) \n\t"
"lw %[temp6], 24(%[in1]) \n\t"
"lw %[temp7], 24(%[in2]) \n\t"
"addiu %[out1], %[out1], 1024 \n\t"
"addiu %[out2], %[out2], 1024 \n\t"
"addiu %[in1], %[in1], 32 \n\t"
"addiu %[in2], %[in2], 32 \n\t"
PTR_ADDIU "%[out1], %[out1], 1024 \n\t"
PTR_ADDIU "%[out2], %[out2], 1024 \n\t"
PTR_ADDIU "%[in1], %[in1], 32 \n\t"
PTR_ADDIU "%[in2], %[in2], 32 \n\t"
"sw %[temp0], -1024(%[out1]) \n\t"
"sw %[temp1], -1024(%[out2]) \n\t"
"sw %[temp2], -768(%[out1]) \n\t"
@@ -161,10 +162,10 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
"lw %[temp5], 16(%[in2]) \n\t"
"lw %[temp6], 24(%[in1]) \n\t"
"lw %[temp7], 24(%[in2]) \n\t"
"addiu %[out1], %[out1], -7164 \n\t"
"addiu %[out2], %[out2], -7164 \n\t"
"addiu %[in1], %[in1], 32 \n\t"
"addiu %[in2], %[in2], 32 \n\t"
PTR_ADDIU "%[out1], %[out1], -7164 \n\t"
PTR_ADDIU "%[out2], %[out2], -7164 \n\t"
PTR_ADDIU "%[in1], %[in1], 32 \n\t"
PTR_ADDIU "%[in2], %[in2], 32 \n\t"
"sw %[temp0], 7164(%[out1]) \n\t"
"sw %[temp1], 7164(%[out2]) \n\t"
"sw %[temp2], 7420(%[out1]) \n\t"
@@ -226,8 +227,8 @@ static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
"swc1 %[temp2], 4(%[dst0]) \n\t"
"swc1 %[temp4], 8(%[dst0]) \n\t"
"swc1 %[temp6], 12(%[dst0]) \n\t"
"addiu %[dst0], %[dst0], 16 \n\t"
"addiu %[src0], %[src0], 32 \n\t"
PTR_ADDIU "%[dst0], %[dst0], 16 \n\t"
PTR_ADDIU "%[src0], %[src0], 32 \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
[temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
@@ -257,14 +258,14 @@ static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *sr
"lwc1 %[temp2], 0(%[p_s1]) \n\t"
"lwc1 %[temp0], 0(%[p_s0]) \n\t"
"lwc1 %[temp1], 4(%[p_s0]) \n\t"
"addiu %[p_d], %[p_d], 8 \n\t"
PTR_ADDIU "%[p_d], %[p_d], 8 \n\t"
"mul.s %[temp0], %[temp0], %[temp2] \n\t"
"mul.s %[temp1], %[temp1], %[temp2] \n\t"
"addiu %[p_s0], %[p_s0], 8 \n\t"
PTR_ADDIU "%[p_s0], %[p_s0], 8 \n\t"
"swc1 %[temp0], -8(%[p_d]) \n\t"
"swc1 %[temp1], -4(%[p_d]) \n\t"
"bne %[p_s1], %[end], 1b \n\t"
" addiu %[p_s1], %[p_s1], 4 \n\t"
PTR_ADDIU "%[p_s1], %[p_s1], 4 \n\t"
".set pop \n\t"
: [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
@@ -355,13 +356,13 @@ static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
"mul.s %[temp1], %[ag2], %[temp3] \n\t"
"lwc1 %[temp4], 0(%[p_t_gain]) \n\t"
"sub.s %[temp0], %[temp8], %[temp0] \n\t"
"addiu %[p_ap_delay], %[p_ap_delay], 8 \n\t"
PTR_ADDIU "%[p_ap_delay], %[p_ap_delay], 8 \n\t"
"sub.s %[temp1], %[temp9], %[temp1] \n\t"
"addiu %[p_t_gain], %[p_t_gain], 4 \n\t"
PTR_ADDIU "%[p_t_gain], %[p_t_gain], 4 \n\t"
"madd.s %[temp2], %[temp2], %[ag2], %[temp0] \n\t"
"addiu %[p_delay], %[p_delay], 8 \n\t"
PTR_ADDIU "%[p_delay], %[p_delay], 8 \n\t"
"madd.s %[temp3], %[temp3], %[ag2], %[temp1] \n\t"
"addiu %[p_out], %[p_out], 8 \n\t"
PTR_ADDIU "%[p_out], %[p_out], 8 \n\t"
"mul.s %[temp5], %[temp4], %[temp0] \n\t"
"mul.s %[temp6], %[temp4], %[temp1] \n\t"
"swc1 %[temp2], 624(%[p_ap_delay]) \n\t"
@@ -414,9 +415,9 @@ static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
"add.s %[h3], %[h3], %[hs3] \n\t"
"lwc1 %[r_im], 4(%[r]) \n\t"
"mul.s %[temp0], %[h0], %[l_re] \n\t"
"addiu %[l], %[l], 8 \n\t"
PTR_ADDIU "%[l], %[l], 8 \n\t"
"mul.s %[temp2], %[h1], %[l_re] \n\t"
"addiu %[r], %[r], 8 \n\t"
PTR_ADDIU "%[r], %[r], 8 \n\t"
"madd.s %[temp0], %[temp0], %[h2], %[r_re] \n\t"
"madd.s %[temp2], %[temp2], %[h3], %[r_re] \n\t"
"mul.s %[temp1], %[h0], %[l_im] \n\t"