ffmpeg/libavcodec/x86/hevc/dequant.asm
Andreas Rheinhardt 436b74b725 avcodec/x86/hevc/dequant: Add SSSE3 dequant ASM function
hevc_dequant_4x4_8_c (GCC):                             20.2 ( 1.00x)
hevc_dequant_4x4_8_c (Clang):                           21.7 ( 1.00x)
hevc_dequant_4x4_8_ssse3:                                5.8 ( 3.51x)
hevc_dequant_8x8_8_c (GCC):                             32.9 ( 1.00x)
hevc_dequant_8x8_8_c (Clang):                           78.7 ( 1.00x)
hevc_dequant_8x8_8_ssse3:                                6.8 ( 4.83x)
hevc_dequant_16x16_8_c (GCC):                          105.1 ( 1.00x)
hevc_dequant_16x16_8_c (Clang):                        151.1 ( 1.00x)
hevc_dequant_16x16_8_ssse3:                             19.3 ( 5.45x)
hevc_dequant_32x32_8_c (GCC):                          415.7 ( 1.00x)
hevc_dequant_32x32_8_c (Clang):                        602.3 ( 1.00x)
hevc_dequant_32x32_8_ssse3:                             78.2 ( 5.32x)

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2026-01-29 12:25:33 +01:00

60 lines
1.9 KiB
NASM

;*****************************************************************************
;* SSSE3-optimized HEVC dequant code
;*****************************************************************************
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
SECTION .text
INIT_XMM ssse3
; void ff_hevc_dequant_8_ssse3(int16_t *coeffs, int16_t log2_size)
cglobal hevc_dequant_8, 2, 3+UNIX64, 3
; coeffs, log2_size (in ecx), tmp/size
%if WIN64
DECLARE_REG_TMP 1,0,2
; r0 is the shift register (ecx) on win64
xchg r0, r1
%elif ARCH_X86_64
DECLARE_REG_TMP 0,3,1
; r3 is ecx
mov t1d, r1d
%else
; r1 is ecx
DECLARE_REG_TMP 0,1,2
%endif
mov t2d, 256
shl t2d, t1b
movd m0, t2d
add t1d, t1d
SPLATW m0, m0
mov t2d, 1
shl t2d, t1b
.loop:
mova m1, [t0]
mova m2, [t0+mmsize]
pmulhrsw m1, m0
pmulhrsw m2, m0
mova [t0], m1
mova [t0+mmsize], m2
add t0, 2*mmsize
sub t2d, mmsize
jg .loop
RET