mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-02-11 20:49:59 +00:00
These assembly optimizations have been identified as "performance regressions." Due to advancements in modern CPU micro-architectures and compiler optimization the C implementations now consistently outperform these handwritten routines. Test Name A55-clang M1 A76-gcc-14 A510-clang A715-clang X3-clang -------------------------------------------------------------------------------------------------------------------- pred8x8_dc_8_neon 55.9 ( 0.79x)! 0.2 ( 0.31x)! 35.7 ( 0.63x)! 98.3 ( 0.37x)! 35.9 ( 0.45x)! 33.6 ( 0.38x)! pred8x8_dc_10_neon 57.0 ( 1.04x) 0.3 ( 0.36x)! 35.9 ( 0.94x)! 98.2 ( 0.53x)! 35.8 ( 0.58x)! 33.2 ( 0.50x)! pred8x8_dc_128_8_neon 26.0 ( 0.69x)! 0.1 ( 0.43x)! 15.3 ( 0.73x)! 46.4 ( 0.36x)! 10.6 ( 0.48x)! 10.3 ( 1.09x) pred8x8_dc_128_10_neon 25.3 ( 0.99x)! 0.1 ( 0.42x)! 19.3 ( 0.48x)! 44.5 ( 0.42x)! 10.0 ( 0.61x)! 11.0 ( 1.00x) pred8x8_left_dc_8_neon 46.9 ( 0.72x)! 0.2 ( 0.26x)! 30.2 ( 0.49x)! 71.4 ( 0.39x)! 29.8 ( 0.35x)! 26.5 ( 0.44x)! pred8x8_left_dc_10_neon 45.4 ( 0.82x)! 0.2 ( 0.29x)! 28.1 ( 0.67x)! 70.2 ( 0.47x)! 30.0 ( 0.38x)! 26.5 ( 0.43x)! pred16x16_dc_8_neon 74.4 ( 1.34x) 0.3 ( 0.62x)! 44.7 ( 0.89x)! 128.0 ( 0.79x)! 48.5 ( 0.67x)! 39.4 ( 0.71x)! pred16x16_dc_128_8_neon 37.9 ( 0.79x)! 0.1 ( 0.60x)! 20.1 ( 0.80x)! 41.8 ( 0.46x)! 16.2 ( 0.81x)! 12.8 ( 0.95x)! pred16x16_left_dc_8_neon 69.9 ( 1.19x) 0.3 ( 0.46x)! 49.6 ( 0.54x)! 116.8 ( 0.62x)! 52.8 ( 0.45x)! 44.2 ( 0.51x)! pred8x8_hori_8_neon 30.6 ( 1.39x) 0.1 ( 0.45x)! 19.4 ( 0.81x)! 71.0 ( 0.50x)! 15.9 ( 0.55x)! 12.2 ( 0.94x)! pred8x8_hori_10_neon* 29.3 ( 1.82x) 0.1 ( 0.59x)! 18.5 ( 1.56x) 68.9 ( 0.64x)! 15.8 ( 0.62x)! 11.8 ( 0.97x)! pred8x8_top_dc_8_neon 35.8 ( 0.96x)! 0.1 ( 0.59x)! 16.8 ( 0.81x)! 58.9 ( 0.44x)! 11.3 ( 0.89x)! 11.4 ( 0.99x)! pred8x8_top_dc_10_neon 37.4 ( 1.24x) 0.1 ( 0.92x)! 20.4 ( 0.81x)! 59.5 ( 0.69x)! 10.5 ( 1.48x) 11.8 ( 1.02x) pred8x8_vertical_8_neon 18.3 ( 1.08x) 0.1 ( 0.54x)! 12.8 ( 0.89x)! 37.2 ( 0.40x)! 8.3 ( 0.77x)! 11.2 ( 1.00x) pred8x8_vertical_10_neon 19.0 ( 1.24x) 0.1 ( 0.55x)! 15.3 ( 0.62x)! 39.7 ( 0.50x)! 8.2 ( 0.91x)! 11.1 ( 0.99x)! - pred8x8_horizontal_10 also underperforms on new architectures, but useful on A55 and A76. Signed-off-by: Zhao Zhili <zhilizhao@tencent.com> |
||
|---|---|---|
| .. | ||
| h26x | ||
| vvc | ||
| aacencdsp_init.c | ||
| aacencdsp_neon.S | ||
| aacpsdsp_init_aarch64.c | ||
| aacpsdsp_neon.S | ||
| ac3dsp_init_aarch64.c | ||
| ac3dsp_neon.S | ||
| cabac.h | ||
| fdct.h | ||
| fdctdsp_init_aarch64.c | ||
| fdctdsp_neon.S | ||
| fmtconvert_init.c | ||
| fmtconvert_neon.S | ||
| h264chroma_init_aarch64.c | ||
| h264cmc_neon.S | ||
| h264dsp_init_aarch64.c | ||
| h264dsp_neon.S | ||
| h264idct_neon.S | ||
| h264pred_init.c | ||
| h264pred_neon.S | ||
| h264qpel_init_aarch64.c | ||
| h264qpel_neon.S | ||
| hevcdsp_deblock_neon.S | ||
| hevcdsp_dequant_neon.S | ||
| hevcdsp_idct_neon.S | ||
| hevcdsp_init_aarch64.c | ||
| hpeldsp_init_aarch64.c | ||
| hpeldsp_neon.S | ||
| idct.h | ||
| idctdsp_init_aarch64.c | ||
| idctdsp_neon.S | ||
| Makefile | ||
| me_cmp_init_aarch64.c | ||
| me_cmp_neon.S | ||
| mpegaudiodsp_init.c | ||
| mpegaudiodsp_neon.S | ||
| mpegvideoencdsp_init.c | ||
| mpegvideoencdsp_neon.S | ||
| neon.S | ||
| neontest.c | ||
| opusdsp_init.c | ||
| opusdsp_neon.S | ||
| pixblockdsp_init_aarch64.c | ||
| pixblockdsp_neon.S | ||
| pngdsp_init.c | ||
| pngdsp_neon.S | ||
| rv40dsp_init_aarch64.c | ||
| sbrdsp_init_aarch64.c | ||
| sbrdsp_neon.S | ||
| simple_idct_neon.S | ||
| synth_filter_init.c | ||
| synth_filter_neon.S | ||
| vc1dsp_init_aarch64.c | ||
| vc1dsp_neon.S | ||
| videodsp.S | ||
| videodsp_init.c | ||
| vorbisdsp_init.c | ||
| vorbisdsp_neon.S | ||
| vp8dsp.h | ||
| vp8dsp_init_aarch64.c | ||
| vp8dsp_neon.S | ||
| vp9dsp_init.h | ||
| vp9dsp_init_10bpp_aarch64.c | ||
| vp9dsp_init_12bpp_aarch64.c | ||
| vp9dsp_init_16bpp_aarch64_template.c | ||
| vp9dsp_init_aarch64.c | ||
| vp9itxfm_16bpp_neon.S | ||
| vp9itxfm_neon.S | ||
| vp9lpf_16bpp_neon.S | ||
| vp9lpf_neon.S | ||
| vp9mc_16bpp_neon.S | ||
| vp9mc_aarch64.S | ||
| vp9mc_neon.S | ||