crypto/internal/fips140/nistec: avoid some mul64 in p256 calculations

x * 0xffffffff00000000 = x * (2^64 - 2^32)
x * 0xffffffffffffffff = x * (2^64 - 1)

Comparing to parent commit:

goos: darwin
goarch: arm64
pkg: crypto/ecdsa
cpu: Apple M4 Max
                    │     OLD     │                NEW                │
                    │   sec/op    │   sec/op     vs base              │
Sign/P256-16          15.83µ ± 1%   15.18µ ± 1%  -4.06% (p=0.002 n=6)
Sign/P384-16          108.0µ ± 0%   107.6µ ± 1%  -0.36% (p=0.041 n=6)
Sign/P521-16          264.5µ ± 0%   262.9µ ± 0%  -0.61% (p=0.002 n=6)
Verify/P256-16        35.89µ ± 0%   35.23µ ± 0%  -1.84% (p=0.002 n=6)
Verify/P384-16        322.4µ ± 0%   321.6µ ± 0%  -0.28% (p=0.041 n=6)
Verify/P521-16        850.3µ ± 0%   847.4µ ± 0%  -0.35% (p=0.002 n=6)
GenerateKey/P256-16   7.241µ ± 0%   7.125µ ± 1%  -1.60% (p=0.002 n=6)
GenerateKey/P384-16   71.03µ ± 0%   70.94µ ± 0%  -0.13% (p=0.041 n=6)
GenerateKey/P521-16   180.6µ ± 0%   180.1µ ± 0%  -0.31% (p=0.002 n=6)
geomean               90.65µ        89.68µ       -1.07%

Change-Id: If8d659a76624dce1c53e23e6ed201189476e3f6b
Reviewed-on: https://go-review.googlesource.com/c/go/+/780640
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Auto-Submit: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Egon Elbre 2026-05-20 19:55:10 +03:00 committed by Gopher Robot
parent 2c659bb4db
commit 244c8ae4c8

View file

@ -126,8 +126,12 @@ func p256OrdMul(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, arg2 *p256Or
x17, x18 := bits.Add64(x8, x5, x16)
x19 := x18 + x6
_, x20 := bits.Mul64(x11, 0xccd1c8aaee00bc4f)
x23, x22 := bits.Mul64(x20, 0xffffffff00000000)
x25, x24 := bits.Mul64(x20, 0xffffffffffffffff)
// x20 * 0xffffffff00000000 = x20 * (2^64 - 2^32)
x22, b22 := bits.Sub64(0, x20<<32, 0)
x23 := x20 - (x20 >> 32) - b22
// x20 * 0xffffffffffffffff = x20 * (2^64 - 1)
x24, b24 := bits.Sub64(0, x20, 0)
x25 := x20 - b24
x27, x26 := bits.Mul64(x20, 0xbce6faada7179e84)
x29, x28 := bits.Mul64(x20, 0xf3b9cac2fc632551)
x30, x31 := bits.Add64(x29, x26, 0)
@ -153,8 +157,10 @@ func p256OrdMul(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, arg2 *p256Or
x68, x69 := bits.Add64(x45, x59, x67)
x70, x71 := bits.Add64(x46, x61, x69)
_, x72 := bits.Mul64(x62, 0xccd1c8aaee00bc4f)
x75, x74 := bits.Mul64(x72, 0xffffffff00000000)
x77, x76 := bits.Mul64(x72, 0xffffffffffffffff)
x74, b74 := bits.Sub64(0, x72<<32, 0)
x75 := x72 - (x72 >> 32) - b74
x76, b76 := bits.Sub64(0, x72, 0)
x77 := x72 - b76
x79, x78 := bits.Mul64(x72, 0xbce6faada7179e84)
x81, x80 := bits.Mul64(x72, 0xf3b9cac2fc632551)
x82, x83 := bits.Add64(x81, x78, 0)
@ -181,8 +187,10 @@ func p256OrdMul(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, arg2 *p256Or
x121, x122 := bits.Add64(x97, x112, x120)
x123, x124 := bits.Add64(x99, x114, x122)
_, x125 := bits.Mul64(x115, 0xccd1c8aaee00bc4f)
x128, x127 := bits.Mul64(x125, 0xffffffff00000000)
x130, x129 := bits.Mul64(x125, 0xffffffffffffffff)
x127, b127 := bits.Sub64(0, x125<<32, 0)
x128 := x125 - (x125 >> 32) - b127
x129, b129 := bits.Sub64(0, x125, 0)
x130 := x125 - b129
x132, x131 := bits.Mul64(x125, 0xbce6faada7179e84)
x134, x133 := bits.Mul64(x125, 0xf3b9cac2fc632551)
x135, x136 := bits.Add64(x134, x131, 0)
@ -209,8 +217,10 @@ func p256OrdMul(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, arg2 *p256Or
x174, x175 := bits.Add64(x150, x165, x173)
x176, x177 := bits.Add64(x152, x167, x175)
_, x178 := bits.Mul64(x168, 0xccd1c8aaee00bc4f)
x181, x180 := bits.Mul64(x178, 0xffffffff00000000)
x183, x182 := bits.Mul64(x178, 0xffffffffffffffff)
x180, b180 := bits.Sub64(0, x178<<32, 0)
x181 := x178 - (x178 >> 32) - b180
x182, b182 := bits.Sub64(0, x178, 0)
x183 := x178 - b182
x185, x184 := bits.Mul64(x178, 0xbce6faada7179e84)
x187, x186 := bits.Mul64(x178, 0xf3b9cac2fc632551)
x188, x189 := bits.Add64(x187, x184, 0)
@ -251,8 +261,10 @@ func p256OrdSqr(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, n int) {
x17, x18 := bits.Add64(x8, x5, x16)
x19 := x18 + x6
_, x20 := bits.Mul64(x11, 0xccd1c8aaee00bc4f)
x23, x22 := bits.Mul64(x20, 0xffffffff00000000)
x25, x24 := bits.Mul64(x20, 0xffffffffffffffff)
x22, b22 := bits.Sub64(0, x20<<32, 0)
x23 := x20 - (x20 >> 32) - b22
x24, b24 := bits.Sub64(0, x20, 0)
x25 := x20 - b24
x27, x26 := bits.Mul64(x20, 0xbce6faada7179e84)
x29, x28 := bits.Mul64(x20, 0xf3b9cac2fc632551)
x30, x31 := bits.Add64(x29, x26, 0)
@ -278,8 +290,10 @@ func p256OrdSqr(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, n int) {
x68, x69 := bits.Add64(x45, x59, x67)
x70, x71 := bits.Add64(x46, x61, x69)
_, x72 := bits.Mul64(x62, 0xccd1c8aaee00bc4f)
x75, x74 := bits.Mul64(x72, 0xffffffff00000000)
x77, x76 := bits.Mul64(x72, 0xffffffffffffffff)
x74, b74 := bits.Sub64(0, x72<<32, 0)
x75 := x72 - (x72 >> 32) - b74
x76, b76 := bits.Sub64(0, x72, 0)
x77 := x72 - b76
x79, x78 := bits.Mul64(x72, 0xbce6faada7179e84)
x81, x80 := bits.Mul64(x72, 0xf3b9cac2fc632551)
x82, x83 := bits.Add64(x81, x78, 0)
@ -306,8 +320,10 @@ func p256OrdSqr(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, n int) {
x121, x122 := bits.Add64(x97, x112, x120)
x123, x124 := bits.Add64(x99, x114, x122)
_, x125 := bits.Mul64(x115, 0xccd1c8aaee00bc4f)
x128, x127 := bits.Mul64(x125, 0xffffffff00000000)
x130, x129 := bits.Mul64(x125, 0xffffffffffffffff)
x127, b127 := bits.Sub64(0, x125<<32, 0)
x128 := x125 - (x125 >> 32) - b127
x129, b129 := bits.Sub64(0, x125, 0)
x130 := x125 - b129
x132, x131 := bits.Mul64(x125, 0xbce6faada7179e84)
x134, x133 := bits.Mul64(x125, 0xf3b9cac2fc632551)
x135, x136 := bits.Add64(x134, x131, 0)
@ -334,8 +350,10 @@ func p256OrdSqr(out1 *p256OrdMontElement, arg1 *p256OrdMontElement, n int) {
x174, x175 := bits.Add64(x150, x165, x173)
x176, x177 := bits.Add64(x152, x167, x175)
_, x178 := bits.Mul64(x168, 0xccd1c8aaee00bc4f)
x181, x180 := bits.Mul64(x178, 0xffffffff00000000)
x183, x182 := bits.Mul64(x178, 0xffffffffffffffff)
x180, b180 := bits.Sub64(0, x178<<32, 0)
x181 := x178 - (x178 >> 32) - b180
x182, b182 := bits.Sub64(0, x178, 0)
x183 := x178 - b182
x185, x184 := bits.Mul64(x178, 0xbce6faada7179e84)
x187, x186 := bits.Mul64(x178, 0xf3b9cac2fc632551)
x188, x189 := bits.Add64(x187, x184, 0)