mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-04 14:40:26 +00:00
arm: Reindent assembly where it was off by one char
This commit is contained in:
parent
946e80fde7
commit
17765fe831
6 changed files with 85 additions and 85 deletions
|
|
@ -322,44 +322,44 @@ endfunc
|
|||
.endm
|
||||
|
||||
.macro tr_4x4 in0, in1, in2, in3, out0, out1, out2, out3, shift, tmp0, tmp1, tmp2, tmp3, tmp4
|
||||
vshll.s16 \tmp0, \in0, #6
|
||||
vmull.s16 \tmp2, \in1, d4[1]
|
||||
vmov \tmp1, \tmp0
|
||||
vmull.s16 \tmp3, \in1, d4[3]
|
||||
vmlal.s16 \tmp0, \in2, d4[0] @e0
|
||||
vmlsl.s16 \tmp1, \in2, d4[0] @e1
|
||||
vmlal.s16 \tmp2, \in3, d4[3] @o0
|
||||
vmlsl.s16 \tmp3, \in3, d4[1] @o1
|
||||
vshll.s16 \tmp0, \in0, #6
|
||||
vmull.s16 \tmp2, \in1, d4[1]
|
||||
vmov \tmp1, \tmp0
|
||||
vmull.s16 \tmp3, \in1, d4[3]
|
||||
vmlal.s16 \tmp0, \in2, d4[0] @e0
|
||||
vmlsl.s16 \tmp1, \in2, d4[0] @e1
|
||||
vmlal.s16 \tmp2, \in3, d4[3] @o0
|
||||
vmlsl.s16 \tmp3, \in3, d4[1] @o1
|
||||
|
||||
vadd.s32 \tmp4, \tmp0, \tmp2
|
||||
vsub.s32 \tmp0, \tmp0, \tmp2
|
||||
vadd.s32 \tmp2, \tmp1, \tmp3
|
||||
vsub.s32 \tmp1, \tmp1, \tmp3
|
||||
vqrshrn.s32 \out0, \tmp4, #\shift
|
||||
vqrshrn.s32 \out3, \tmp0, #\shift
|
||||
vqrshrn.s32 \out1, \tmp2, #\shift
|
||||
vqrshrn.s32 \out2, \tmp1, #\shift
|
||||
vadd.s32 \tmp4, \tmp0, \tmp2
|
||||
vsub.s32 \tmp0, \tmp0, \tmp2
|
||||
vadd.s32 \tmp2, \tmp1, \tmp3
|
||||
vsub.s32 \tmp1, \tmp1, \tmp3
|
||||
vqrshrn.s32 \out0, \tmp4, #\shift
|
||||
vqrshrn.s32 \out3, \tmp0, #\shift
|
||||
vqrshrn.s32 \out1, \tmp2, #\shift
|
||||
vqrshrn.s32 \out2, \tmp1, #\shift
|
||||
.endm
|
||||
|
||||
.macro tr_4x4_8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3
|
||||
vshll.s16 \tmp0, \in0, #6
|
||||
vld1.s16 {\in0}, [r1, :64]!
|
||||
vmov \tmp1, \tmp0
|
||||
vmull.s16 \tmp2, \in1, \in0[1]
|
||||
vmull.s16 \tmp3, \in1, \in0[3]
|
||||
vmlal.s16 \tmp0, \in2, \in0[0] @e0
|
||||
vmlsl.s16 \tmp1, \in2, \in0[0] @e1
|
||||
vmlal.s16 \tmp2, \in3, \in0[3] @o0
|
||||
vmlsl.s16 \tmp3, \in3, \in0[1] @o1
|
||||
vshll.s16 \tmp0, \in0, #6
|
||||
vld1.s16 {\in0}, [r1, :64]!
|
||||
vmov \tmp1, \tmp0
|
||||
vmull.s16 \tmp2, \in1, \in0[1]
|
||||
vmull.s16 \tmp3, \in1, \in0[3]
|
||||
vmlal.s16 \tmp0, \in2, \in0[0] @e0
|
||||
vmlsl.s16 \tmp1, \in2, \in0[0] @e1
|
||||
vmlal.s16 \tmp2, \in3, \in0[3] @o0
|
||||
vmlsl.s16 \tmp3, \in3, \in0[1] @o1
|
||||
|
||||
vld1.s16 {\in0}, [r1, :64]
|
||||
vld1.s16 {\in0}, [r1, :64]
|
||||
|
||||
vadd.s32 \out0, \tmp0, \tmp2
|
||||
vadd.s32 \out1, \tmp1, \tmp3
|
||||
vsub.s32 \out2, \tmp1, \tmp3
|
||||
vsub.s32 \out3, \tmp0, \tmp2
|
||||
vadd.s32 \out0, \tmp0, \tmp2
|
||||
vadd.s32 \out1, \tmp1, \tmp3
|
||||
vsub.s32 \out2, \tmp1, \tmp3
|
||||
vsub.s32 \out3, \tmp0, \tmp2
|
||||
|
||||
sub r1, r1, #8
|
||||
sub r1, r1, #8
|
||||
.endm
|
||||
|
||||
@ Do a 4x4 transpose, using q registers for the subtransposes that don't
|
||||
|
|
@ -682,7 +682,7 @@ function func_tr_16x4_\name
|
|||
mov r4, #-32
|
||||
store16 d26, d27, d28, d29, d30, d31, d8, d9, r4
|
||||
.else
|
||||
store_to_stack (\offset + 64), (\offset + 176), q4, q9, q10, q11, q3, q2, q1, q0
|
||||
store_to_stack (\offset + 64), (\offset + 176), q4, q9, q10, q11, q3, q2, q1, q0
|
||||
.endif
|
||||
|
||||
bx lr
|
||||
|
|
@ -900,7 +900,7 @@ function func_tr_32x4_\name
|
|||
add r3, r11, #(32 + 3 * 64)
|
||||
scale_store \shift
|
||||
|
||||
bx r10
|
||||
bx r10
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
|
|
|
|||
|
|
@ -45,26 +45,26 @@ endconst
|
|||
@ need to address the individual d registers.
|
||||
@ r0,r1 == rq1, r2,r3 == rq1, etc
|
||||
.macro transpose32_q_2x_4x4 rq0, rq1, rq2, rq3, rq4, rq5, rq6, rq7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
|
||||
vswp \r1, \r4 @ vtrn.64 \rq0, \rq2
|
||||
vswp \r3, \r6 @ vtrn.64 \rq1, \rq3
|
||||
vswp \r9, \r12 @ vtrn.64 \rq4, \rq6
|
||||
vswp \r11, \r14 @ vtrn.64 \rq5, \rq7
|
||||
vtrn.32 \rq0, \rq1
|
||||
vtrn.32 \rq2, \rq3
|
||||
vtrn.32 \rq4, \rq5
|
||||
vtrn.32 \rq6, \rq7
|
||||
vswp \r1, \r4 @ vtrn.64 \rq0, \rq2
|
||||
vswp \r3, \r6 @ vtrn.64 \rq1, \rq3
|
||||
vswp \r9, \r12 @ vtrn.64 \rq4, \rq6
|
||||
vswp \r11, \r14 @ vtrn.64 \rq5, \rq7
|
||||
vtrn.32 \rq0, \rq1
|
||||
vtrn.32 \rq2, \rq3
|
||||
vtrn.32 \rq4, \rq5
|
||||
vtrn.32 \rq6, \rq7
|
||||
.endm
|
||||
|
||||
@ Do eight 2x2 transposes.
|
||||
.macro transpose32_8x_2x2 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
|
||||
vtrn.32 \r0, \r1
|
||||
vtrn.32 \r2, \r3
|
||||
vtrn.32 \r4, \r5
|
||||
vtrn.32 \r6, \r7
|
||||
vtrn.32 \r8, \r9
|
||||
vtrn.32 \r10, \r11
|
||||
vtrn.32 \r12, \r13
|
||||
vtrn.32 \r14, \r15
|
||||
vtrn.32 \r0, \r1
|
||||
vtrn.32 \r2, \r3
|
||||
vtrn.32 \r4, \r5
|
||||
vtrn.32 \r6, \r7
|
||||
vtrn.32 \r8, \r9
|
||||
vtrn.32 \r10, \r11
|
||||
vtrn.32 \r12, \r13
|
||||
vtrn.32 \r14, \r15
|
||||
.endm
|
||||
|
||||
@ out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14
|
||||
|
|
|
|||
|
|
@ -45,18 +45,18 @@ endconst
|
|||
@ need to address the individual d registers.
|
||||
@ r0,r1 == rq1, r2,r3 == rq1, etc
|
||||
.macro transpose16_q_4x_4x4 rq0, rq1, rq2, rq3, rq4, rq5, rq6, rq7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
|
||||
vtrn.32 \rq0, \rq1
|
||||
vtrn.32 \rq2, \rq3
|
||||
vtrn.32 \rq4, \rq5
|
||||
vtrn.32 \rq6, \rq7
|
||||
vtrn.16 \r0, \r1
|
||||
vtrn.16 \r2, \r3
|
||||
vtrn.16 \r4, \r5
|
||||
vtrn.16 \r6, \r7
|
||||
vtrn.16 \r8, \r9
|
||||
vtrn.16 \r10, \r11
|
||||
vtrn.16 \r12, \r13
|
||||
vtrn.16 \r14, \r15
|
||||
vtrn.32 \rq0, \rq1
|
||||
vtrn.32 \rq2, \rq3
|
||||
vtrn.32 \rq4, \rq5
|
||||
vtrn.32 \rq6, \rq7
|
||||
vtrn.16 \r0, \r1
|
||||
vtrn.16 \r2, \r3
|
||||
vtrn.16 \r4, \r5
|
||||
vtrn.16 \r6, \r7
|
||||
vtrn.16 \r8, \r9
|
||||
vtrn.16 \r10, \r11
|
||||
vtrn.16 \r12, \r13
|
||||
vtrn.16 \r14, \r15
|
||||
.endm
|
||||
|
||||
@ out1 = ((in1 + in2) * d0[0] + (1 << 13)) >> 14
|
||||
|
|
|
|||
|
|
@ -21,25 +21,25 @@
|
|||
#include "libavutil/arm/asm.S"
|
||||
|
||||
.macro transpose16_q_8x8 rq0, rq1, rq2, rq3, rq4, rq5, rq6, rq7, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15
|
||||
vswp \r1, \r8 @ vtrn.64 \rq0, \rq4
|
||||
vswp \r3, \r10 @ vtrn.64 \rq1, \rq5
|
||||
vswp \r5, \r12 @ vtrn.64 \rq2, \rq6
|
||||
vswp \r7, \r14 @ vtrn.64 \rq3, \rq7
|
||||
vtrn.32 \rq0, \rq2
|
||||
vtrn.32 \rq1, \rq3
|
||||
vtrn.32 \rq4, \rq6
|
||||
vtrn.32 \rq5, \rq7
|
||||
vtrn.16 \rq0, \rq1
|
||||
vtrn.16 \rq2, \rq3
|
||||
vtrn.16 \rq4, \rq5
|
||||
vtrn.16 \rq6, \rq7
|
||||
vswp \r1, \r8 @ vtrn.64 \rq0, \rq4
|
||||
vswp \r3, \r10 @ vtrn.64 \rq1, \rq5
|
||||
vswp \r5, \r12 @ vtrn.64 \rq2, \rq6
|
||||
vswp \r7, \r14 @ vtrn.64 \rq3, \rq7
|
||||
vtrn.32 \rq0, \rq2
|
||||
vtrn.32 \rq1, \rq3
|
||||
vtrn.32 \rq4, \rq6
|
||||
vtrn.32 \rq5, \rq7
|
||||
vtrn.16 \rq0, \rq1
|
||||
vtrn.16 \rq2, \rq3
|
||||
vtrn.16 \rq4, \rq5
|
||||
vtrn.16 \rq6, \rq7
|
||||
.endm
|
||||
|
||||
.macro transpose16_4x4 r0, r1, r2, r3
|
||||
vtrn.32 \r0, \r2
|
||||
vtrn.32 \r1, \r3
|
||||
vtrn.16 \r0, \r1
|
||||
vtrn.16 \r2, \r3
|
||||
vtrn.32 \r0, \r2
|
||||
vtrn.32 \r1, \r3
|
||||
vtrn.16 \r0, \r1
|
||||
vtrn.16 \r2, \r3
|
||||
.endm
|
||||
|
||||
@ Do a 4x4 transpose, using q registers for the subtransposes that don't
|
||||
|
|
|
|||
|
|
@ -148,16 +148,16 @@ endfunc
|
|||
@ Helper macros for vmull/vmlal with a constant from either d0 or d1 depending on index
|
||||
.macro vmull_lane dst, src, idx
|
||||
.if \idx < 4
|
||||
vmull.s16 \dst, \src, d0[\idx]
|
||||
vmull.s16 \dst, \src, d0[\idx]
|
||||
.else
|
||||
vmull.s16 \dst, \src, d1[\idx - 4]
|
||||
vmull.s16 \dst, \src, d1[\idx - 4]
|
||||
.endif
|
||||
.endm
|
||||
.macro vmlal_lane dst, src, idx
|
||||
.if \idx < 4
|
||||
vmlal.s16 \dst, \src, d0[\idx]
|
||||
vmlal.s16 \dst, \src, d0[\idx]
|
||||
.else
|
||||
vmlal.s16 \dst, \src, d1[\idx - 4]
|
||||
vmlal.s16 \dst, \src, d1[\idx - 4]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
|
|
|||
|
|
@ -193,16 +193,16 @@ endfunc
|
|||
@ Helper macros for vmul/vmla with a constant from either d0 or d1 depending on index
|
||||
.macro vmul_lane dst, src, idx
|
||||
.if \idx < 4
|
||||
vmul.s16 \dst, \src, d0[\idx]
|
||||
vmul.s16 \dst, \src, d0[\idx]
|
||||
.else
|
||||
vmul.s16 \dst, \src, d1[\idx - 4]
|
||||
vmul.s16 \dst, \src, d1[\idx - 4]
|
||||
.endif
|
||||
.endm
|
||||
.macro vmla_lane dst, src, idx
|
||||
.if \idx < 4
|
||||
vmla.s16 \dst, \src, d0[\idx]
|
||||
vmla.s16 \dst, \src, d0[\idx]
|
||||
.else
|
||||
vmla.s16 \dst, \src, d1[\idx - 4]
|
||||
vmla.s16 \dst, \src, d1[\idx - 4]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue