mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-04-18 00:20:21 +00:00
Add NEON-optimized implementation for HEVC intra DC prediction at 8-bit
depth, supporting all block sizes (4x4 to 32x32).
DC prediction computes the average of top and left reference samples
using uaddlv, with urshr for rounded division. For luma blocks smaller
than 32x32, edge smoothing is applied: the first row and column are
blended toward the reference using (ref[i] + 3*dc + 2) >> 2 computed
entirely in the NEON domain. Fill stores use pre-computed address
patterns to break dependency chains.
Also adds the aarch64 initialization framework (Makefile, pred.c/pred.h
hooks, hevcpred_init_aarch64.c).
Speedup over C on Apple M4 (checkasm --bench):
4x4: 2.28x 8x8: 3.14x 16x16: 3.29x 32x32: 3.02x
Signed-off-by: Jun Zhao <barryjzhao@tencent.com>
85 lines
5.2 KiB
Makefile
85 lines
5.2 KiB
Makefile
# subsystems
|
|
OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_init_aarch64.o
|
|
OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_init_aarch64.o
|
|
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
|
|
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
|
|
OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
|
OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_init_aarch64.o
|
|
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
|
|
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
|
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
|
|
OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
|
|
OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_init_aarch64.o
|
|
OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o
|
|
OBJS-$(CONFIG_MPEGVIDEOENCDSP) += aarch64/mpegvideoencdsp_init.o
|
|
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
|
|
OBJS-$(CONFIG_PIXBLOCKDSP) += aarch64/pixblockdsp_init_aarch64.o
|
|
OBJS-$(CONFIG_PNG_DECODER) += aarch64/pngdsp_init.o
|
|
OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o
|
|
OBJS-$(CONFIG_VP8DSP) += aarch64/vp8dsp_init_aarch64.o
|
|
|
|
# decoders/encoders
|
|
OBJS-$(CONFIG_AAC_DECODER) += aarch64/aacpsdsp_init_aarch64.o \
|
|
aarch64/sbrdsp_init_aarch64.o
|
|
OBJS-$(CONFIG_AAC_ENCODER) += aarch64/aacencdsp_init.o
|
|
OBJS-$(CONFIG_DCA_DECODER) += aarch64/synth_filter_init.o
|
|
OBJS-$(CONFIG_OPUS_DECODER) += aarch64/opusdsp_init.o
|
|
OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
|
|
OBJS-$(CONFIG_VC1DSP) += aarch64/vc1dsp_init_aarch64.o
|
|
OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o
|
|
OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9dsp_init_10bpp_aarch64.o \
|
|
aarch64/vp9dsp_init_12bpp_aarch64.o \
|
|
aarch64/vp9mc_aarch64.o \
|
|
aarch64/vp9dsp_init_aarch64.o
|
|
|
|
# ARMv8 optimizations
|
|
|
|
# subsystems
|
|
ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o
|
|
|
|
# NEON optimizations
|
|
|
|
# subsystems
|
|
NEON-OBJS-$(CONFIG_AAC_DECODER) += aarch64/sbrdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_AAC_ENCODER) += aarch64/aacencdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_neon.o
|
|
NEON-OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o
|
|
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
|
|
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
|
|
aarch64/h264idct_neon.o
|
|
NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o
|
|
NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \
|
|
aarch64/hpeldsp_neon.o
|
|
NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
|
|
NEON-OBJS-$(CONFIG_HUFFYUVDSP) += aarch64/huffyuvdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_neon.o \
|
|
aarch64/simple_idct_neon.o
|
|
NEON-OBJS-$(CONFIG_ME_CMP) += aarch64/me_cmp_neon.o
|
|
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
|
|
NEON-OBJS-$(CONFIG_MPEGVIDEOENCDSP) += aarch64/mpegvideoencdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_PIXBLOCKDSP) += aarch64/pixblockdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_PNG_DECODER) += aarch64/pngdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_VC1DSP) += aarch64/vc1dsp_neon.o
|
|
NEON-OBJS-$(CONFIG_VP8DSP) += aarch64/vp8dsp_neon.o
|
|
|
|
# decoders/encoders
|
|
NEON-OBJS-$(CONFIG_AAC_DECODER) += aarch64/aacpsdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/synth_filter_neon.o
|
|
NEON-OBJS-$(CONFIG_OPUS_DECODER) += aarch64/opusdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_neon.o
|
|
NEON-OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9itxfm_16bpp_neon.o \
|
|
aarch64/vp9itxfm_neon.o \
|
|
aarch64/vp9lpf_16bpp_neon.o \
|
|
aarch64/vp9lpf_neon.o \
|
|
aarch64/vp9mc_16bpp_neon.o \
|
|
aarch64/vp9mc_neon.o
|
|
NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_deblock_neon.o \
|
|
aarch64/hevcdsp_dequant_neon.o \
|
|
aarch64/hevcdsp_idct_neon.o \
|
|
aarch64/hevcdsp_init_aarch64.o \
|
|
aarch64/hevcpred_neon.o \
|
|
aarch64/hevcpred_init_aarch64.o \
|
|
aarch64/h26x/epel_neon.o \
|
|
aarch64/h26x/qpel_neon.o \
|
|
aarch64/h26x/sao_neon.o
|