mirror of
https://github.com/nyanmisaka/ffmpeg-rockchip.git
synced 2025-12-24 11:52:06 +08:00
lavu/x86/lls: add fma3 optimizations for update_lls
This improves accuracy (very slightly) and speed for processors having fma3. Sample benchmark (fate flac-16-lpc-cholesky, Haswell): old: 5993610 decicycles in ff_lpc_calc_coefs, 64 runs, 0 skips 5951528 decicycles in ff_lpc_calc_coefs, 128 runs, 0 skips new: 5252410 decicycles in ff_lpc_calc_coefs, 64 runs, 0 skips 5232869 decicycles in ff_lpc_calc_coefs, 128 runs, 0 skips Tested with FATE and --disable-fma3, also examined contents of lavu/lls-test. Reviewed-by: James Almer <jamrial@gmail.com> Reviewed-by: Henrik Gramner <henrik@gramner.com> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
This commit is contained in:
@@ -25,6 +25,7 @@
|
||||
|
||||
void ff_update_lls_sse2(LLSModel *m, const double *var);
|
||||
void ff_update_lls_avx(LLSModel *m, const double *var);
|
||||
void ff_update_lls_fma3(LLSModel *m, const double *var);
|
||||
double ff_evaluate_lls_sse2(LLSModel *m, const double *var, int order);
|
||||
|
||||
av_cold void ff_init_lls_x86(LLSModel *m)
|
||||
@@ -38,4 +39,7 @@ av_cold void ff_init_lls_x86(LLSModel *m)
|
||||
if (EXTERNAL_AVX_FAST(cpu_flags)) {
|
||||
m->update_lls = ff_update_lls_avx;
|
||||
}
|
||||
if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
|
||||
m->update_lls = ff_update_lls_fma3;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user