mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-02-18 13:30:22 +00:00
Intel provided a microcode update to mitigate this security
vulnerability which has a huge negative performance impact on gather
instructions. This means that hscale 8to15 avx2, which uses gather
extensively, is no longer faster than SSSE3 on impacted CPUs.
https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/gather-data-sampling.html
Broadwell:
hscale_8_to_15__fs_4_dstW_512_c: 3379.5 ( 1.00x)
hscale_8_to_15__fs_4_dstW_512_sse2: 615.7 ( 5.49x)
hscale_8_to_15__fs_4_dstW_512_ssse3: 613.4 ( 5.51x)
hscale_8_to_15__fs_4_dstW_512_avx2: 495.7 ( 6.82x)
Skylake:
hscale_8_to_15__fs_4_dstW_512_c: 3411.4 ( 1.00x)
hscale_8_to_15__fs_4_dstW_512_sse2: 591.0 ( 5.77x)
hscale_8_to_15__fs_4_dstW_512_ssse3: 591.5 ( 5.77x)
hscale_8_to_15__fs_4_dstW_512_avx2: 1386.2 ( 2.46x)
Cascade Lake:
hscale_8_to_15__fs_4_dstW_512_c: 3231.3 ( 1.00x)
hscale_8_to_15__fs_4_dstW_512_sse2: 517.9 ( 6.24x)
hscale_8_to_15__fs_4_dstW_512_ssse3: 521.6 ( 6.19x)
hscale_8_to_15__fs_4_dstW_512_avx2: 1775.0 ( 1.82x)
Sapphire Rapids:
hscale_8_to_15__fs_4_dstW_512_c: 1840.0 ( 1.00x)
hscale_8_to_15__fs_4_dstW_512_sse2: 287.9 ( 6.39x)
hscale_8_to_15__fs_4_dstW_512_ssse3: 293.8 ( 6.26x)
hscale_8_to_15__fs_4_dstW_512_avx2: 219.2 ( 8.40x)
|
||
|---|---|---|
| .. | ||
| aes.asm | ||
| aes_init.c | ||
| asm.h | ||
| bswap.h | ||
| cpu.c | ||
| cpu.h | ||
| cpuid.asm | ||
| emms.asm | ||
| fixed_dsp.asm | ||
| fixed_dsp_init.c | ||
| float_dsp.asm | ||
| float_dsp_init.c | ||
| imgutils.asm | ||
| imgutils_init.c | ||
| intmath.h | ||
| intreadwrite.h | ||
| lls.asm | ||
| lls_init.c | ||
| Makefile | ||
| pixelutils.asm | ||
| pixelutils.h | ||
| pixelutils_init.c | ||
| timer.h | ||
| tx_float.asm | ||
| tx_float_init.c | ||
| w64xmmtest.h | ||
| x86inc.asm | ||
| x86util.asm | ||