arm: Reindent assembly where it was off by one char

This commit is contained in:
Martin Storsjö 2026-04-07 12:13:32 +03:00
parent 946e80fde7
commit 17765fe831
6 changed files with 85 additions and 85 deletions

View file

@ -322,44 +322,44 @@ endfunc
.endm
.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, tmp2, tmp3, tmp4
vshll.s16 \tmp0, \in0, #6
vmull.s16 \tmp2, \in1, d4[1]
vmov \tmp1, \tmp0
vmull.s16 \tmp3, \in1, d4[3]
vmlal.s16 \tmp0, \in2, d4[0] @e0
vmlsl.s16 \tmp1, \in2, d4[0] @e1
vmlal.s16 \tmp2, \in3, d4[3] @o0
vmlsl.s16 \tmp3, \in3, d4[1] @o1
vshll.s16 \tmp0, \in0, #6
vmull.s16 \tmp2, \in1, d4[1]
vmov \tmp1, \tmp0
vmull.s16 \tmp3, \in1, d4[3]
vmlal.s16 \tmp0, \in2, d4[0] @e0
vmlsl.s16 \tmp1, \in2, d4[0] @e1
vmlal.s16 \tmp2, \in3, d4[3] @o0
vmlsl.s16 \tmp3, \in3, d4[1] @o1
vadd.s32 \tmp4, \tmp0, \tmp2
vsub.s32 \tmp0, \tmp0, \tmp2
vadd.s32 \tmp2, \tmp1, \tmp3
vsub.s32 \tmp1, \tmp1, \tmp3
vqrshrn.s32 \out0, \tmp4, #\shift
vqrshrn.s32 \out3, \tmp0, #\shift
vqrshrn.s32 \out1, \tmp2, #\shift
vqrshrn.s32 \out2, \tmp1, #\shift
vadd.s32 \tmp4, \tmp0, \tmp2
vsub.s32 \tmp0, \tmp0, \tmp2
vadd.s32 \tmp2, \tmp1, \tmp3
vsub.s32 \tmp1, \tmp1, \tmp3
vqrshrn.s32 \out0, \tmp4, #\shift
vqrshrn.s32 \out3, \tmp0, #\shift
vqrshrn.s32 \out1, \tmp2, #\shift
vqrshrn.s32 \out2, \tmp1, #\shift
.endm
.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3
vshll.s16 \tmp0, \in0, #6
vld1.s16 {\in0}, [r1, :64]!
vmov \tmp1, \tmp0
vmull.s16 \tmp2, \in1, \in0[1]
vmull.s16 \tmp3, \in1, \in0[3]
vmlal.s16 \tmp0, \in2, \in0[0] @e0
vmlsl.s16 \tmp1, \in2, \in0[0] @e1
vmlal.s16 \tmp2, \in3, \in0[3] @o0
vmlsl.s16 \tmp3, \in3, \in0[1] @o1
vshll.s16 \tmp0, \in0, #6
vld1.s16 {\in0}, [r1, :64]!
vmov \tmp1, \tmp0
vmull.s16 \tmp2, \in1, \in0[1]
vmull.s16 \tmp3, \in1, \in0[3]
vmlal.s16 \tmp0, \in2, \in0[0] @e0
vmlsl.s16 \tmp1, \in2, \in0[0] @e1
vmlal.s16 \tmp2, \in3, \in0[3] @o0
vmlsl.s16 \tmp3, \in3, \in0[1] @o1
vld1.s16 {\in0}, [r1, :64]
vld1.s16 {\in0}, [r1, :64]
vadd.s32 \out0, \tmp0, \tmp2
vadd.s32 \out1, \tmp1, \tmp3
vsub.s32 \out2, \tmp1, \tmp3
vsub.s32 \out3, \tmp0, \tmp2
vadd.s32 \out0, \tmp0, \tmp2
vadd.s32 \out1, \tmp1, \tmp3
vsub.s32 \out2, \tmp1, \tmp3
vsub.s32 \out3, \tmp0, \tmp2
sub r1, r1, #8
sub r1, r1, #8
.endm
@ Do a 4x4 transpose, using q registers for the subtransposes that don't
@ -682,7 +682,7 @@ function func_tr_16x4_\name
mov r4, #-32
store16 d26, d27, d28, d29, d30, d31, d8, d9, r4
.else
store_to_stack (\offset + 64), (\offset + 176), q4, q9, q10, q11, q3, q2, q1, q0
store_to_stack (\offset + 64), (\offset + 176), q4, q9, q10, q11, q3, q2, q1, q0
.endif
bx lr
@ -900,7 +900,7 @@ function func_tr_32x4_\name
add r3, r11, #(32 + 3 * 64)
scale_store \shift
bx r10
bx r10
endfunc
.endm

View file

@ -45,26 +45,26 @@ endconst
@ need to address the individual d registers.
@ r0,r1 == rq1, r2,r3 == rq1, etc
.macro transpose32_q_2x_4x4 rq0, rq1, rq2, rq3, rq4, rq5, rq6, rq7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
vswp \r1, \r4 @ vtrn.64 \rq0, \rq2
vswp \r3, \r6 @ vtrn.64 \rq1, \rq3
vswp \r9, \r12 @ vtrn.64 \rq4, \rq6
vswp \r11, \r14 @ vtrn.64 \rq5, \rq7
vtrn.32 \rq0, \rq1
vtrn.32 \rq2, \rq3
vtrn.32 \rq4, \rq5
vtrn.32 \rq6, \rq7
vswp \r1, \r4 @ vtrn.64 \rq0, \rq2
vswp \r3, \r6 @ vtrn.64 \rq1, \rq3
vswp \r9, \r12 @ vtrn.64 \rq4, \rq6
vswp \r11, \r14 @ vtrn.64 \rq5, \rq7
vtrn.32 \rq0, \rq1
vtrn.32 \rq2, \rq3
vtrn.32 \rq4, \rq5
vtrn.32 \rq6, \rq7
.endm
@ Do eight 2x2 transposes.
.macro transpose32_8x_2x2 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
vtrn.32 \r0, \r1
vtrn.32 \r2, \r3
vtrn.32 \r4, \r5
vtrn.32 \r6, \r7
vtrn.32 \r8, \r9
vtrn.32 \r10, \r11
vtrn.32 \r12, \r13
vtrn.32 \r14, \r15
vtrn.32 \r0, \r1
vtrn.32 \r2, \r3
vtrn.32 \r4, \r5
vtrn.32 \r6, \r7
vtrn.32 \r8, \r9
vtrn.32 \r10, \r11
vtrn.32 \r12, \r13
vtrn.32 \r14, \r15
.endm
@ out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14

View file

@ -45,18 +45,18 @@ endconst
@ need to address the individual d registers.
@ r0,r1 == rq1, r2,r3 == rq1, etc
.macro transpose16_q_4x_4x4 rq0, rq1, rq2, rq3, rq4, rq5, rq6, rq7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
vtrn.32 \rq0, \rq1
vtrn.32 \rq2, \rq3
vtrn.32 \rq4, \rq5
vtrn.32 \rq6, \rq7
vtrn.16 \r0, \r1
vtrn.16 \r2, \r3
vtrn.16 \r4, \r5
vtrn.16 \r6, \r7
vtrn.16 \r8, \r9
vtrn.16 \r10, \r11
vtrn.16 \r12, \r13
vtrn.16 \r14, \r15
vtrn.32 \rq0, \rq1
vtrn.32 \rq2, \rq3
vtrn.32 \rq4, \rq5
vtrn.32 \rq6, \rq7
vtrn.16 \r0, \r1
vtrn.16 \r2, \r3
vtrn.16 \r4, \r5
vtrn.16 \r6, \r7
vtrn.16 \r8, \r9
vtrn.16 \r10, \r11
vtrn.16 \r12, \r13
vtrn.16 \r14, \r15
.endm
@ out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14

View file

@ -21,25 +21,25 @@
#include "libavutil/arm/asm.S"
.macro transpose16_q_8x8 rq0, rq1, rq2, rq3, rq4, rq5, rq6, rq7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
vswp \r1, \r8 @ vtrn.64 \rq0, \rq4
vswp \r3, \r10 @ vtrn.64 \rq1, \rq5
vswp \r5, \r12 @ vtrn.64 \rq2, \rq6
vswp \r7, \r14 @ vtrn.64 \rq3, \rq7
vtrn.32 \rq0, \rq2
vtrn.32 \rq1, \rq3
vtrn.32 \rq4, \rq6
vtrn.32 \rq5, \rq7
vtrn.16 \rq0, \rq1
vtrn.16 \rq2, \rq3
vtrn.16 \rq4, \rq5
vtrn.16 \rq6, \rq7
vswp \r1, \r8 @ vtrn.64 \rq0, \rq4
vswp \r3, \r10 @ vtrn.64 \rq1, \rq5
vswp \r5, \r12 @ vtrn.64 \rq2, \rq6
vswp \r7, \r14 @ vtrn.64 \rq3, \rq7
vtrn.32 \rq0, \rq2
vtrn.32 \rq1, \rq3
vtrn.32 \rq4, \rq6
vtrn.32 \rq5, \rq7
vtrn.16 \rq0, \rq1
vtrn.16 \rq2, \rq3
vtrn.16 \rq4, \rq5
vtrn.16 \rq6, \rq7
.endm
.macro transpose16_4x4 r0, r1, r2, r3
vtrn.32 \r0, \r2
vtrn.32 \r1, \r3
vtrn.16 \r0, \r1
vtrn.16 \r2, \r3
vtrn.32 \r0, \r2
vtrn.32 \r1, \r3
vtrn.16 \r0, \r1
vtrn.16 \r2, \r3
.endm
@ Do a 4x4 transpose, using q registers for the subtransposes that don't

View file

@ -148,16 +148,16 @@ endfunc
@ Helper macros for vmull/vmlal with a constant from either d0 or d1 depending on index
.macro vmull_lane dst, src, idx
.if \idx < 4
vmull.s16 \dst, \src, d0[\idx]
vmull.s16 \dst, \src, d0[\idx]
.else
vmull.s16 \dst, \src, d1[\idx - 4]
vmull.s16 \dst, \src, d1[\idx - 4]
.endif
.endm
.macro vmlal_lane dst, src, idx
.if \idx < 4
vmlal.s16 \dst, \src, d0[\idx]
vmlal.s16 \dst, \src, d0[\idx]
.else
vmlal.s16 \dst, \src, d1[\idx - 4]
vmlal.s16 \dst, \src, d1[\idx - 4]
.endif
.endm

View file

@ -193,16 +193,16 @@ endfunc
@ Helper macros for vmul/vmla with a constant from either d0 or d1 depending on index
.macro vmul_lane dst, src, idx
.if \idx < 4
vmul.s16 \dst, \src, d0[\idx]
vmul.s16 \dst, \src, d0[\idx]
.else
vmul.s16 \dst, \src, d1[\idx - 4]
vmul.s16 \dst, \src, d1[\idx - 4]
.endif
.endm
.macro vmla_lane dst, src, idx
.if \idx < 4
vmla.s16 \dst, \src, d0[\idx]
vmla.s16 \dst, \src, d0[\idx]
.else
vmla.s16 \dst, \src, d1[\idx - 4]
vmla.s16 \dst, \src, d1[\idx - 4]
.endif
.endm