mirror of
https://github.com/golang/go.git
synced 2025-10-19 11:03:18 +00:00
crypto/internal/fips140/subtle: add assembly implementation of xorBytes for mipsx
goos: linux goarch: mipsle pkg: crypto/subtle │ osubtle │ nsubtle │ │ sec/op │ sec/op vs base │ ConstantTimeByteEq-4 2.785n ± 0% 2.785n ± 0% ~ (p=0.876 n=8) ConstantTimeEq-4 3.342n ± 0% 3.341n ± 0% ~ (p=0.258 n=8) ConstantTimeLessOrEq-4 3.341n ± 0% 3.340n ± 0% ~ (p=0.370 n=8) XORBytes/8Bytes-4 117.80n ± 0% 27.02n ± 2% -77.07% (p=0.000 n=8) XORBytes/128Bytes-4 176.60n ± 0% 58.42n ± 4% -66.92% (p=0.000 n=8) XORBytes/2048Bytes-4 996.5n ± 0% 462.4n ± 0% -53.60% (p=0.000 n=8) XORBytes/8192Bytes-4 3.568µ ± 0% 1.780µ ± 2% -50.13% (p=0.000 n=8) XORBytes/32768Bytes-4 19.34µ ± 6% 10.52µ ± 5% -45.60% (p=0.000 n=8) XORBytesAlignment/8Bytes0Offset-4 127.50n ± 0% 28.31n ± 1% -77.80% (p=0.000 n=8) XORBytesAlignment/8Bytes1Offset-4 105.65n ± 1% 28.20n ± 1% -73.30% (p=0.000 n=8) XORBytesAlignment/8Bytes2Offset-4 105.55n ± 1% 28.34n ± 1% -73.15% (p=0.000 n=8) XORBytesAlignment/8Bytes3Offset-4 105.65n ± 0% 28.45n ± 1% -73.07% (p=0.000 n=8) XORBytesAlignment/8Bytes4Offset-4 127.60n ± 0% 28.19n ± 1% -77.91% (p=0.000 n=8) XORBytesAlignment/8Bytes5Offset-4 105.45n ± 0% 28.38n ± 1% -73.09% (p=0.000 n=8) XORBytesAlignment/8Bytes6Offset-4 105.55n ± 0% 28.27n ± 1% -73.22% (p=0.000 n=8) XORBytesAlignment/8Bytes7Offset-4 105.60n ± 0% 28.24n ± 1% -73.26% (p=0.000 n=8) XORBytesAlignment/128Bytes0Offset-4 178.25n ± 0% 59.57n ± 0% -66.58% (p=0.000 n=8) XORBytesAlignment/128Bytes1Offset-4 313.25n ± 0% 75.32n ± 0% -75.96% (p=0.000 n=8) XORBytesAlignment/128Bytes2Offset-4 313.75n ± 0% 75.34n ± 0% -75.99% (p=0.000 n=8) XORBytesAlignment/128Bytes3Offset-4 314.25n ± 0% 75.31n ± 0% -76.04% (p=0.000 n=8) XORBytesAlignment/128Bytes4Offset-4 178.25n ± 0% 59.57n ± 0% -66.58% (p=0.000 n=8) XORBytesAlignment/128Bytes5Offset-4 314.20n ± 0% 75.80n ± 1% -75.88% (p=0.000 n=8) XORBytesAlignment/128Bytes6Offset-4 313.30n ± 0% 75.56n ± 0% -75.88% (p=0.000 n=8) XORBytesAlignment/128Bytes7Offset-4 313.95n ± 0% 75.45n ± 0% -75.97% (p=0.000 n=8) XORBytesAlignment/2048Bytes0Offset-4 1002.5n ± 0% 455.3n ± 0% -54.58% (p=0.000 n=8) XORBytesAlignment/2048Bytes1Offset-4 3649.5n ± 0% 731.6n ± 0% -79.95% (p=0.000 n=8) XORBytesAlignment/2048Bytes2Offset-4 3645.0n ± 0% 731.5n ± 0% -79.93% (p=0.000 n=8) XORBytesAlignment/2048Bytes3Offset-4 3656.0n ± 0% 731.6n ± 0% -79.99% (p=0.000 n=8) XORBytesAlignment/2048Bytes4Offset-4 1003.0n ± 0% 455.6n ± 0% -54.58% (p=0.000 n=8) XORBytesAlignment/2048Bytes5Offset-4 3651.5n ± 1% 736.6n ± 0% -79.83% (p=0.000 n=8) XORBytesAlignment/2048Bytes6Offset-4 3647.5n ± 0% 736.4n ± 0% -79.81% (p=0.000 n=8) XORBytesAlignment/2048Bytes7Offset-4 3657.0n ± 1% 736.6n ± 0% -79.86% (p=0.000 n=8) geomean 313.1n 96.95n -69.03% │ osubtle │ nsubtle │ │ B/s │ B/s vs base │ XORBytes/8Bytes-4 64.77Mi ± 0% 282.51Mi ± 2% +336.18% (p=0.000 n=8) XORBytes/128Bytes-4 691.3Mi ± 0% 2092.3Mi ± 4% +202.66% (p=0.000 n=8) XORBytes/2048Bytes-4 1.914Gi ± 0% 4.125Gi ± 0% +115.51% (p=0.000 n=8) XORBytes/8192Bytes-4 2.138Gi ± 0% 4.288Gi ± 2% +100.54% (p=0.000 n=8) XORBytes/32768Bytes-4 1.583Gi ± 6% 2.908Gi ± 5% +83.61% (p=0.000 n=8) XORBytesAlignment/8Bytes0Offset-4 59.83Mi ± 0% 269.47Mi ± 1% +350.37% (p=0.000 n=8) XORBytesAlignment/8Bytes1Offset-4 72.22Mi ± 0% 270.51Mi ± 1% +274.56% (p=0.000 n=8) XORBytesAlignment/8Bytes2Offset-4 72.28Mi ± 1% 269.19Mi ± 1% +272.41% (p=0.000 n=8) XORBytesAlignment/8Bytes3Offset-4 72.21Mi ± 0% 268.16Mi ± 1% +271.38% (p=0.000 n=8) XORBytesAlignment/8Bytes4Offset-4 59.79Mi ± 0% 270.67Mi ± 1% +352.74% (p=0.000 n=8) XORBytesAlignment/8Bytes5Offset-4 72.36Mi ± 0% 268.83Mi ± 1% +271.49% (p=0.000 n=8) XORBytesAlignment/8Bytes6Offset-4 72.29Mi ± 0% 269.95Mi ± 1% +273.44% (p=0.000 n=8) XORBytesAlignment/8Bytes7Offset-4 72.27Mi ± 0% 270.14Mi ± 1% +273.79% (p=0.000 n=8) XORBytesAlignment/128Bytes0Offset-4 684.7Mi ± 0% 2049.1Mi ± 0% +199.26% (p=0.000 n=8) XORBytesAlignment/128Bytes1Offset-4 389.7Mi ± 1% 1620.7Mi ± 0% +315.86% (p=0.000 n=8) XORBytesAlignment/128Bytes2Offset-4 389.1Mi ± 0% 1620.3Mi ± 0% +316.41% (p=0.000 n=8) XORBytesAlignment/128Bytes3Offset-4 388.4Mi ± 1% 1620.9Mi ± 0% +317.29% (p=0.000 n=8) XORBytesAlignment/128Bytes4Offset-4 684.8Mi ± 0% 2049.2Mi ± 0% +199.24% (p=0.000 n=8) XORBytesAlignment/128Bytes5Offset-4 388.5Mi ± 0% 1610.3Mi ± 1% +314.47% (p=0.000 n=8) XORBytesAlignment/128Bytes6Offset-4 389.6Mi ± 0% 1615.4Mi ± 0% +314.60% (p=0.000 n=8) XORBytesAlignment/128Bytes7Offset-4 388.9Mi ± 0% 1617.8Mi ± 1% +316.04% (p=0.000 n=8) XORBytesAlignment/2048Bytes0Offset-4 1.903Gi ± 3% 4.189Gi ± 3% +120.18% (p=0.000 n=8) XORBytesAlignment/2048Bytes1Offset-4 535.1Mi ± 0% 2669.7Mi ± 0% +398.88% (p=0.000 n=8) XORBytesAlignment/2048Bytes2Offset-4 535.8Mi ± 0% 2670.1Mi ± 0% +398.34% (p=0.000 n=8) XORBytesAlignment/2048Bytes3Offset-4 534.2Mi ± 0% 2669.6Mi ± 0% +399.71% (p=0.000 n=8) XORBytesAlignment/2048Bytes4Offset-4 1.902Gi ± 0% 4.187Gi ± 0% +120.12% (p=0.000 n=8) XORBytesAlignment/2048Bytes5Offset-4 534.9Mi ± 0% 2651.6Mi ± 0% +395.73% (p=0.000 n=8) XORBytesAlignment/2048Bytes6Offset-4 535.5Mi ± 0% 2652.3Mi ± 0% +395.34% (p=0.000 n=8) XORBytesAlignment/2048Bytes7Offset-4 534.1Mi ± 1% 2651.6Mi ± 0% +396.46% (p=0.000 n=8) geomean 338.6Mi 1.205Gi +264.51% Change-Id: I4d7e759968779cf8470826b8662b9f2018e663bc Reviewed-on: https://go-review.googlesource.com/c/go/+/666275 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Keith Randall <khr@google.com>
This commit is contained in:
parent
238d273da4
commit
343e486bfd
3 changed files with 214 additions and 2 deletions
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64) && !purego
|
||||
//go:build (amd64 || arm64 || loong64 || mips || mipsle || mips64 || mips64le || ppc64 || ppc64le || riscv64) && !purego
|
||||
|
||||
package subtle
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (!amd64 && !arm64 && !loong64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !riscv64) || purego
|
||||
//go:build (!amd64 && !arm64 && !loong64 && !mips && !mipsle && !mips64 && !mips64le && !ppc64 && !ppc64le && !riscv64) || purego
|
||||
|
||||
package subtle
|
||||
|
||||
|
|
212
src/crypto/internal/fips140/subtle/xor_mipsx.s
Normal file
212
src/crypto/internal/fips140/subtle/xor_mipsx.s
Normal file
|
@ -0,0 +1,212 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (mips || mipsle) && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func xorBytes(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVW dst+0(FP), R1
|
||||
MOVW a+4(FP), R2
|
||||
MOVW b+8(FP), R3
|
||||
MOVW n+12(FP), R4
|
||||
|
||||
SGTU $64, R4, R5 // R5 = 1 if (64 > R4)
|
||||
BNE R5, xor_32_check
|
||||
xor_64:
|
||||
MOVW (R2), R6
|
||||
MOVW 4(R2), R7
|
||||
MOVW 8(R2), R8
|
||||
MOVW 12(R2), R9
|
||||
MOVW (R3), R10
|
||||
MOVW 4(R3), R11
|
||||
MOVW 8(R3), R12
|
||||
MOVW 12(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, (R1)
|
||||
MOVW R11, 4(R1)
|
||||
MOVW R12, 8(R1)
|
||||
MOVW R13, 12(R1)
|
||||
MOVW 16(R2), R6
|
||||
MOVW 20(R2), R7
|
||||
MOVW 24(R2), R8
|
||||
MOVW 28(R2), R9
|
||||
MOVW 16(R3), R10
|
||||
MOVW 20(R3), R11
|
||||
MOVW 24(R3), R12
|
||||
MOVW 28(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, 16(R1)
|
||||
MOVW R11, 20(R1)
|
||||
MOVW R12, 24(R1)
|
||||
MOVW R13, 28(R1)
|
||||
MOVW 32(R2), R6
|
||||
MOVW 36(R2), R7
|
||||
MOVW 40(R2), R8
|
||||
MOVW 44(R2), R9
|
||||
MOVW 32(R3), R10
|
||||
MOVW 36(R3), R11
|
||||
MOVW 40(R3), R12
|
||||
MOVW 44(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, 32(R1)
|
||||
MOVW R11, 36(R1)
|
||||
MOVW R12, 40(R1)
|
||||
MOVW R13, 44(R1)
|
||||
MOVW 48(R2), R6
|
||||
MOVW 52(R2), R7
|
||||
MOVW 56(R2), R8
|
||||
MOVW 60(R2), R9
|
||||
MOVW 48(R3), R10
|
||||
MOVW 52(R3), R11
|
||||
MOVW 56(R3), R12
|
||||
MOVW 60(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, 48(R1)
|
||||
MOVW R11, 52(R1)
|
||||
MOVW R12, 56(R1)
|
||||
MOVW R13, 60(R1)
|
||||
ADD $64, R2
|
||||
ADD $64, R3
|
||||
ADD $64, R1
|
||||
SUB $64, R4
|
||||
SGTU $64, R4, R5
|
||||
BEQ R0, R5, xor_64
|
||||
BEQ R0, R4, end
|
||||
|
||||
xor_32_check:
|
||||
SGTU $32, R4, R5
|
||||
BNE R5, xor_16_check
|
||||
xor_32:
|
||||
MOVW (R2), R6
|
||||
MOVW 4(R2), R7
|
||||
MOVW 8(R2), R8
|
||||
MOVW 12(R2), R9
|
||||
MOVW (R3), R10
|
||||
MOVW 4(R3), R11
|
||||
MOVW 8(R3), R12
|
||||
MOVW 12(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, (R1)
|
||||
MOVW R11, 4(R1)
|
||||
MOVW R12, 8(R1)
|
||||
MOVW R13, 12(R1)
|
||||
MOVW 16(R2), R6
|
||||
MOVW 20(R2), R7
|
||||
MOVW 24(R2), R8
|
||||
MOVW 28(R2), R9
|
||||
MOVW 16(R3), R10
|
||||
MOVW 20(R3), R11
|
||||
MOVW 24(R3), R12
|
||||
MOVW 28(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, 16(R1)
|
||||
MOVW R11, 20(R1)
|
||||
MOVW R12, 24(R1)
|
||||
MOVW R13, 28(R1)
|
||||
ADD $32, R2
|
||||
ADD $32, R3
|
||||
ADD $32, R1
|
||||
SUB $32, R4
|
||||
BEQ R0, R4, end
|
||||
|
||||
xor_16_check:
|
||||
SGTU $16, R4, R5
|
||||
BNE R5, xor_8_check
|
||||
xor_16:
|
||||
MOVW (R2), R6
|
||||
MOVW 4(R2), R7
|
||||
MOVW 8(R2), R8
|
||||
MOVW 12(R2), R9
|
||||
MOVW (R3), R10
|
||||
MOVW 4(R3), R11
|
||||
MOVW 8(R3), R12
|
||||
MOVW 12(R3), R13
|
||||
XOR R6, R10
|
||||
XOR R7, R11
|
||||
XOR R8, R12
|
||||
XOR R9, R13
|
||||
MOVW R10, (R1)
|
||||
MOVW R11, 4(R1)
|
||||
MOVW R12, 8(R1)
|
||||
MOVW R13, 12(R1)
|
||||
ADD $16, R2
|
||||
ADD $16, R3
|
||||
ADD $16, R1
|
||||
SUB $16, R4
|
||||
BEQ R0, R4, end
|
||||
|
||||
xor_8_check:
|
||||
SGTU $8, R4, R5
|
||||
BNE R5, xor_4_check
|
||||
xor_8:
|
||||
MOVW (R2), R6
|
||||
MOVW 4(R2), R7
|
||||
MOVW (R3), R8
|
||||
MOVW 4(R3), R9
|
||||
XOR R6, R8
|
||||
XOR R7, R9
|
||||
MOVW R8, (R1)
|
||||
MOVW R9, 4(R1)
|
||||
ADD $8, R1
|
||||
ADD $8, R2
|
||||
ADD $8, R3
|
||||
SUB $8, R4
|
||||
BEQ R0, R4, end
|
||||
|
||||
xor_4_check:
|
||||
SGTU $4, R4, R5
|
||||
BNE R5, xor_2_check
|
||||
xor_4:
|
||||
MOVW (R2), R6
|
||||
MOVW (R3), R7
|
||||
XOR R6, R7
|
||||
MOVW R7, (R1)
|
||||
ADD $4, R2
|
||||
ADD $4, R3
|
||||
ADD $4, R1
|
||||
SUB $4, R4
|
||||
BEQ R0, R4, end
|
||||
|
||||
xor_2_check:
|
||||
SGTU $2, R4, R5
|
||||
BNE R5, xor_1
|
||||
xor_2:
|
||||
MOVH (R2), R6
|
||||
MOVH (R3), R7
|
||||
XOR R6, R7
|
||||
MOVH R7, (R1)
|
||||
ADD $2, R2
|
||||
ADD $2, R3
|
||||
ADD $2, R1
|
||||
SUB $2, R4
|
||||
BEQ R0, R4, end
|
||||
|
||||
xor_1:
|
||||
MOVB (R2), R6
|
||||
MOVB (R3), R7
|
||||
XOR R6, R7
|
||||
MOVB R7, (R1)
|
||||
|
||||
end:
|
||||
RET
|
Loading…
Add table
Add a link
Reference in a new issue