mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-05 06:50:27 +00:00
Port lfe_fir0_float and lfe_fir1_float to AArch64 NEON. These polyphase FIR interpolation filters have an x86 SSE/AVX path but no AArch64 equivalent, falling back to scalar C. The inner loop computes two dot products per output pair. Precomputing a reversed LFE sample vector before the inner loop avoids per-iteration shuffle overhead. Benchmarks on AWS Graviton3 (Neoverse V1, c7g.xlarge): lfe_fir0_float: C 5902.0 cycles -> NEON 2135.0 cycles (2.77x) lfe_fir1_float: C 2836.3 cycles -> NEON 1527.8 cycles (1.86x) Measured with: taskset -c 0 ./tests/checkasm/checkasm --test=dcadsp --bench, 3-run average, Ubuntu 22.04 (kernel 6.8.0-1052-aws), perf_event_paranoid=0. Signed-off-by: Jeongkeun Kim <variety0724@gmail.com> |
||
|---|---|---|
| .. | ||
| h26x | ||
| vvc | ||
| aacencdsp_init.c | ||
| aacencdsp_neon.S | ||
| aacpsdsp_init_aarch64.c | ||
| aacpsdsp_neon.S | ||
| ac3dsp_init_aarch64.c | ||
| ac3dsp_neon.S | ||
| cabac.h | ||
| dcadsp_init_aarch64.c | ||
| dcadsp_neon.S | ||
| fdct.h | ||
| fdctdsp_init_aarch64.c | ||
| fdctdsp_neon.S | ||
| fmtconvert_init.c | ||
| fmtconvert_neon.S | ||
| h264chroma_init_aarch64.c | ||
| h264cmc_neon.S | ||
| h264dsp_init_aarch64.c | ||
| h264dsp_neon.S | ||
| h264idct_neon.S | ||
| h264pred_init.c | ||
| h264pred_neon.S | ||
| h264qpel_init_aarch64.c | ||
| h264qpel_neon.S | ||
| hevcdsp_deblock_neon.S | ||
| hevcdsp_dequant_neon.S | ||
| hevcdsp_idct_neon.S | ||
| hevcdsp_init_aarch64.c | ||
| hevcpred_init_aarch64.c | ||
| hevcpred_neon.S | ||
| hpeldsp_init_aarch64.c | ||
| hpeldsp_neon.S | ||
| huffyuvdsp_init_aarch64.c | ||
| huffyuvdsp_neon.S | ||
| idct.h | ||
| idctdsp_init_aarch64.c | ||
| idctdsp_neon.S | ||
| Makefile | ||
| me_cmp_init_aarch64.c | ||
| me_cmp_neon.S | ||
| mpegaudiodsp_init.c | ||
| mpegaudiodsp_neon.S | ||
| mpegvideoencdsp_init.c | ||
| mpegvideoencdsp_neon.S | ||
| neon.S | ||
| neontest.c | ||
| opusdsp_init.c | ||
| opusdsp_neon.S | ||
| pixblockdsp_init_aarch64.c | ||
| pixblockdsp_neon.S | ||
| pngdsp_init.c | ||
| pngdsp_neon.S | ||
| rv40dsp_init_aarch64.c | ||
| sbrdsp_init_aarch64.c | ||
| sbrdsp_neon.S | ||
| simple_idct_neon.S | ||
| synth_filter_init.c | ||
| synth_filter_neon.S | ||
| vc1dsp_init_aarch64.c | ||
| vc1dsp_neon.S | ||
| videodsp.S | ||
| videodsp_init.c | ||
| vorbisdsp_init.c | ||
| vorbisdsp_neon.S | ||
| vp8dsp.h | ||
| vp8dsp_init_aarch64.c | ||
| vp8dsp_neon.S | ||
| vp9dsp_init.h | ||
| vp9dsp_init_10bpp_aarch64.c | ||
| vp9dsp_init_12bpp_aarch64.c | ||
| vp9dsp_init_16bpp_aarch64_template.c | ||
| vp9dsp_init_aarch64.c | ||
| vp9itxfm_16bpp_neon.S | ||
| vp9itxfm_neon.S | ||
| vp9lpf_16bpp_neon.S | ||
| vp9lpf_neon.S | ||
| vp9mc_16bpp_neon.S | ||
| vp9mc_aarch64.S | ||
| vp9mc_neon.S | ||