mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-02-15 15:20:22 +00:00
This patch replaces integer widening with halving addition, and multi-step "emulated" rounding shift with a single asm instruction doing exactly that. Benchmarks before and after: A78 avg_8_64x64_neon: 2686.2 ( 6.12x) avg_8_128x128_neon: 10734.2 ( 5.88x) avg_10_64x64_neon: 2536.8 ( 5.40x) avg_10_128x128_neon: 10079.0 ( 5.22x) avg_12_64x64_neon: 2548.2 ( 5.38x) avg_12_128x128_neon: 10133.8 ( 5.19x) avg_8_64x64_neon: 897.8 (18.26x) avg_8_128x128_neon: 3608.5 (17.37x) avg_10_32x32_neon: 444.2 ( 8.51x) avg_10_64x64_neon: 1711.8 ( 8.00x) avg_12_64x64_neon: 1706.2 ( 8.02x) avg_12_128x128_neon: 7010.0 ( 7.46x) A72 avg_8_64x64_neon: 5823.4 ( 3.88x) avg_8_128x128_neon: 17430.5 ( 4.73x) avg_10_64x64_neon: 5228.1 ( 3.71x) avg_10_128x128_neon: 16722.2 ( 4.17x) avg_12_64x64_neon: 5379.1 ( 3.51x) avg_12_128x128_neon: 16715.7 ( 4.17x) avg_8_64x64_neon: 2006.5 (10.61x) avg_8_128x128_neon: 9158.7 ( 8.96x) avg_10_64x64_neon: 3357.7 ( 5.60x) avg_10_128x128_neon: 12411.7 ( 5.56x) avg_12_64x64_neon: 3317.5 ( 5.67x) avg_12_128x128_neon: 12358.5 ( 5.58x) A53 avg_8_64x64_neon: 8327.8 ( 5.18x) avg_8_128x128_neon: 31631.3 ( 5.34x) avg_10_64x64_neon: 8783.5 ( 4.98x) avg_10_128x128_neon: 32617.0 ( 5.25x) avg_12_64x64_neon: 8686.0 ( 5.06x) avg_12_128x128_neon: 32487.5 ( 5.25x) avg_8_64x64_neon: 6032.3 ( 7.17x) avg_8_128x128_neon: 22008.5 ( 7.69x) avg_10_64x64_neon: 7738.0 ( 5.68x) avg_10_128x128_neon: 27813.8 ( 6.14x) avg_12_64x64_neon: 7844.5 ( 5.60x) avg_12_128x128_neon: 26999.5 ( 6.34x) Signed-off-by: Martin Storsjö <martin@martin.st> |
||
|---|---|---|
| .. | ||
| h26x | ||
| vvc | ||
| aacencdsp_init.c | ||
| aacencdsp_neon.S | ||
| aacpsdsp_init_aarch64.c | ||
| aacpsdsp_neon.S | ||
| ac3dsp_init_aarch64.c | ||
| ac3dsp_neon.S | ||
| cabac.h | ||
| fdct.h | ||
| fdctdsp_init_aarch64.c | ||
| fdctdsp_neon.S | ||
| fmtconvert_init.c | ||
| fmtconvert_neon.S | ||
| h264chroma_init_aarch64.c | ||
| h264cmc_neon.S | ||
| h264dsp_init_aarch64.c | ||
| h264dsp_neon.S | ||
| h264idct_neon.S | ||
| h264pred_init.c | ||
| h264pred_neon.S | ||
| h264qpel_init_aarch64.c | ||
| h264qpel_neon.S | ||
| hevcdsp_deblock_neon.S | ||
| hevcdsp_idct_neon.S | ||
| hevcdsp_init_aarch64.c | ||
| hpeldsp_init_aarch64.c | ||
| hpeldsp_neon.S | ||
| idct.h | ||
| idctdsp_init_aarch64.c | ||
| idctdsp_neon.S | ||
| Makefile | ||
| me_cmp_init_aarch64.c | ||
| me_cmp_neon.S | ||
| mpegaudiodsp_init.c | ||
| mpegaudiodsp_neon.S | ||
| mpegvideoencdsp_init.c | ||
| mpegvideoencdsp_neon.S | ||
| neon.S | ||
| neontest.c | ||
| opusdsp_init.c | ||
| opusdsp_neon.S | ||
| pixblockdsp_init_aarch64.c | ||
| pixblockdsp_neon.S | ||
| rv40dsp_init_aarch64.c | ||
| sbrdsp_init_aarch64.c | ||
| sbrdsp_neon.S | ||
| simple_idct_neon.S | ||
| synth_filter_init.c | ||
| synth_filter_neon.S | ||
| vc1dsp_init_aarch64.c | ||
| vc1dsp_neon.S | ||
| videodsp.S | ||
| videodsp_init.c | ||
| vorbisdsp_init.c | ||
| vorbisdsp_neon.S | ||
| vp8dsp.h | ||
| vp8dsp_init_aarch64.c | ||
| vp8dsp_neon.S | ||
| vp9dsp_init.h | ||
| vp9dsp_init_10bpp_aarch64.c | ||
| vp9dsp_init_12bpp_aarch64.c | ||
| vp9dsp_init_16bpp_aarch64_template.c | ||
| vp9dsp_init_aarch64.c | ||
| vp9itxfm_16bpp_neon.S | ||
| vp9itxfm_neon.S | ||
| vp9lpf_16bpp_neon.S | ||
| vp9lpf_neon.S | ||
| vp9mc_16bpp_neon.S | ||
| vp9mc_aarch64.S | ||
| vp9mc_neon.S | ||