swscale/la: Optimize hscale functions with lasx.

ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -s 640x480 -y /dev/null -an
before: 101fps
after:  138fps

Signed-off-by: Hao Chen <chenhao@loongson.cn>
Reviewed-by: yinshiyou-hf@loongson.cn
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Hao Chen
2022-09-09 17:00:24 +08:00
committed by Michael Niedermayer
parent 09cce81245
commit 38cacce22a
8 changed files with 1293 additions and 1 deletions

View File

@@ -53,6 +53,7 @@
#include "libavutil/ppc/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavutil/loongarch/cpu.h"
#include "rgb2rgb.h"
#include "swscale.h"
@@ -659,6 +660,15 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
filterAlign = 1;
}
if (have_lasx(cpu_flags)) {
int reNum = minFilterSize & (0x07);
if (minFilterSize < 5)
filterAlign = 4;
if (reNum < 3)
filterAlign = 1;
}
av_assert0(minFilterSize > 0);
filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
av_assert0(filterSize > 0);
@@ -1844,7 +1854,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
{
const int filterAlign = X86_MMX(cpu_flags) ? 4 :
PPC_ALTIVEC(cpu_flags) ? 8 :
have_neon(cpu_flags) ? 4 : 1;
have_neon(cpu_flags) ? 4 :
have_lasx(cpu_flags) ? 8 : 1;
if ((ret = initFilter(&c->hLumFilter, &c->hLumFilterPos,
&c->hLumFilterSize, c->lumXInc,