mirror of
https://github.com/golang/go.git
synced 2026-02-06 09:50:02 +00:00
crypto/internal/fips140/sha512: interleave scheduling with rounds for 10.3% speed-up
goos: linux
goarch: amd64
pkg: crypto/sha512
cpu: AMD EPYC 7B13
│ before │ after │
│ sec/op │ sec/op vs base │
Hash8Bytes/New 486.7n ± 1% 440.3n ± 0% -9.53% (p=0.001 n=7)
Hash8Bytes/New-2 487.3n ± 1% 442.6n ± 1% -9.17% (p=0.001 n=7)
Hash8Bytes/New-4 488.0n ± 1% 442.3n ± 0% -9.36% (p=0.001 n=7)
Hash8Bytes/Sum384 495.1n ± 0% 451.2n ± 1% -8.87% (p=0.001 n=7)
Hash8Bytes/Sum384-2 495.0n ± 1% 450.8n ± 1% -8.93% (p=0.001 n=7)
Hash8Bytes/Sum384-4 500.2n ± 2% 453.6n ± 2% -9.32% (p=0.001 n=7)
Hash8Bytes/Sum512 497.3n ± 1% 452.1n ± 0% -9.09% (p=0.001 n=7)
Hash8Bytes/Sum512-2 496.0n ± 1% 453.5n ± 0% -8.57% (p=0.001 n=7)
Hash8Bytes/Sum512-4 498.5n ± 0% 452.3n ± 1% -9.27% (p=0.001 n=7)
Hash1K/New 3.985µ ± 1% 3.543µ ± 1% -11.09% (p=0.001 n=7)
Hash1K/New-2 4.004µ ± 2% 3.558µ ± 1% -11.14% (p=0.001 n=7)
Hash1K/New-4 3.997µ ± 0% 3.563µ ± 1% -10.86% (p=0.001 n=7)
Hash1K/Sum384 3.996µ ± 1% 3.560µ ± 1% -10.91% (p=0.001 n=7)
Hash1K/Sum384-2 4.011µ ± 1% 3.576µ ± 1% -10.85% (p=0.001 n=7)
Hash1K/Sum384-4 4.004µ ± 0% 3.564µ ± 0% -10.99% (p=0.001 n=7)
Hash1K/Sum512 3.998µ ± 1% 3.555µ ± 1% -11.08% (p=0.001 n=7)
Hash1K/Sum512-2 3.996µ ± 0% 3.560µ ± 1% -10.91% (p=0.001 n=7)
Hash1K/Sum512-4 4.004µ ± 1% 3.573µ ± 1% -10.76% (p=0.001 n=7)
Hash8K/New 28.34µ ± 1% 25.29µ ± 1% -10.77% (p=0.001 n=7)
Hash8K/New-2 28.45µ ± 1% 25.48µ ± 1% -10.44% (p=0.001 n=7)
Hash8K/New-4 28.42µ ± 1% 25.37µ ± 1% -10.71% (p=0.001 n=7)
Hash8K/Sum384 28.38µ ± 0% 25.18µ ± 0% -11.28% (p=0.001 n=7)
Hash8K/Sum384-2 28.50µ ± 1% 25.35µ ± 1% -11.07% (p=0.001 n=7)
Hash8K/Sum384-4 28.51µ ± 1% 25.35µ ± 0% -11.07% (p=0.001 n=7)
Hash8K/Sum512 28.34µ ± 1% 25.23µ ± 0% -10.96% (p=0.001 n=7)
Hash8K/Sum512-2 28.33µ ± 1% 25.23µ ± 1% -10.95% (p=0.001 n=7)
Hash8K/Sum512-4 28.48µ ± 1% 25.31µ ± 1% -11.13% (p=0.001 n=7)
geomean 3.828µ 3.433µ -10.34%
│ before │ after │
│ B/s │ B/s vs base │
Hash8Bytes/New 15.68Mi ± 1% 17.33Mi ± 0% +10.52% (p=0.001 n=7)
Hash8Bytes/New-2 15.66Mi ± 1% 17.23Mi ± 1% +10.05% (p=0.001 n=7)
Hash8Bytes/New-4 15.63Mi ± 0% 17.25Mi ± 0% +10.37% (p=0.001 n=7)
Hash8Bytes/Sum384 15.41Mi ± 0% 16.91Mi ± 1% +9.72% (p=0.001 n=7)
Hash8Bytes/Sum384-2 15.41Mi ± 1% 16.93Mi ± 1% +9.84% (p=0.001 n=7)
Hash8Bytes/Sum384-4 15.25Mi ± 2% 16.82Mi ± 2% +10.32% (p=0.001 n=7)
Hash8Bytes/Sum512 15.34Mi ± 1% 16.87Mi ± 0% +9.94% (p=0.001 n=7)
Hash8Bytes/Sum512-2 15.38Mi ± 1% 16.82Mi ± 0% +9.36% (p=0.001 n=7)
Hash8Bytes/Sum512-4 15.31Mi ± 0% 16.87Mi ± 1% +10.22% (p=0.001 n=7)
Hash1K/New 245.0Mi ± 1% 275.6Mi ± 1% +12.47% (p=0.001 n=7)
Hash1K/New-2 243.9Mi ± 2% 274.5Mi ± 1% +12.55% (p=0.001 n=7)
Hash1K/New-4 244.3Mi ± 0% 274.1Mi ± 1% +12.21% (p=0.001 n=7)
Hash1K/Sum384 244.4Mi ± 0% 274.3Mi ± 1% +12.24% (p=0.001 n=7)
Hash1K/Sum384-2 243.5Mi ± 1% 273.1Mi ± 1% +12.16% (p=0.001 n=7)
Hash1K/Sum384-4 243.9Mi ± 0% 274.0Mi ± 0% +12.35% (p=0.001 n=7)
Hash1K/Sum512 244.3Mi ± 1% 274.7Mi ± 1% +12.46% (p=0.001 n=7)
Hash1K/Sum512-2 244.4Mi ± 0% 274.3Mi ± 1% +12.25% (p=0.001 n=7)
Hash1K/Sum512-4 243.9Mi ± 1% 273.3Mi ± 1% +12.08% (p=0.001 n=7)
Hash8K/New 275.7Mi ± 1% 309.0Mi ± 1% +12.07% (p=0.001 n=7)
Hash8K/New-2 274.6Mi ± 1% 306.6Mi ± 1% +11.67% (p=0.001 n=7)
Hash8K/New-4 274.9Mi ± 1% 307.9Mi ± 1% +11.99% (p=0.001 n=7)
Hash8K/Sum384 275.3Mi ± 0% 310.3Mi ± 0% +12.71% (p=0.001 n=7)
Hash8K/Sum384-2 274.1Mi ± 1% 308.2Mi ± 1% +12.45% (p=0.001 n=7)
Hash8K/Sum384-4 274.1Mi ± 1% 308.2Mi ± 0% +12.44% (p=0.001 n=7)
Hash8K/Sum512 275.7Mi ± 1% 309.6Mi ± 0% +12.31% (p=0.001 n=7)
Hash8K/Sum512-2 275.8Mi ± 1% 309.7Mi ± 1% +12.29% (p=0.001 n=7)
Hash8K/Sum512-4 274.3Mi ± 1% 308.7Mi ± 1% +12.52% (p=0.001 n=7)
geomean 101.2Mi 112.9Mi +11.53%
│ before │ after │
│ B/op │ B/op vs base │
Hash8Bytes/New 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/New-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum384 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum384-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum384-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum512 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum512-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum512-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/New 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/New-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum384 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum384-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum384-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum512 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum512-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum512-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/New 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/New-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum384 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum384-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum384-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum512 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum512-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum512-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
geomean ² +0.00% ²
¹ all samples are equal
² summaries must be >0 to compute geomean
│ before │ after │
│ allocs/op │ allocs/op vs base │
Hash8Bytes/New 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/New-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum384 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum384-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum384-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum512 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum512-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8Bytes/Sum512-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/New 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/New-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum384 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum384-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum384-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum512 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum512-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash1K/Sum512-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/New 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/New-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum384 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum384-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum384-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum512 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum512-2 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
Hash8K/Sum512-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=7) ¹
geomean ² +0.00% ²
¹ all samples are equal
² summaries must be >0 to compute geomean
Change-Id: I3791244b3f69e093203f6aa46dc59428afcb9223
Reviewed-on: https://go-review.googlesource.com/c/go/+/711844
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
This commit is contained in:
parent
5b34354bd3
commit
2c7c62b972
1 changed files with 14 additions and 15 deletions
|
|
@ -97,23 +97,22 @@ func blockGeneric(dig *Digest, p []byte) {
|
|||
var w [80]uint64
|
||||
h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]
|
||||
for len(p) >= chunk {
|
||||
for i := 0; i < 16; i++ {
|
||||
j := i * 8
|
||||
w[i] = uint64(p[j])<<56 | uint64(p[j+1])<<48 | uint64(p[j+2])<<40 | uint64(p[j+3])<<32 |
|
||||
uint64(p[j+4])<<24 | uint64(p[j+5])<<16 | uint64(p[j+6])<<8 | uint64(p[j+7])
|
||||
}
|
||||
for i := 16; i < 80; i++ {
|
||||
v1 := w[i-2]
|
||||
t1 := bits.RotateLeft64(v1, -19) ^ bits.RotateLeft64(v1, -61) ^ (v1 >> 6)
|
||||
v2 := w[i-15]
|
||||
t2 := bits.RotateLeft64(v2, -1) ^ bits.RotateLeft64(v2, -8) ^ (v2 >> 7)
|
||||
|
||||
w[i] = t1 + w[i-7] + t2 + w[i-16]
|
||||
}
|
||||
|
||||
a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7
|
||||
|
||||
for i := 0; i < 80; i++ {
|
||||
for i := range 80 {
|
||||
if i < 16 {
|
||||
j := i * 8
|
||||
w[i] = uint64(p[j])<<56 | uint64(p[j+1])<<48 | uint64(p[j+2])<<40 | uint64(p[j+3])<<32 |
|
||||
uint64(p[j+4])<<24 | uint64(p[j+5])<<16 | uint64(p[j+6])<<8 | uint64(p[j+7])
|
||||
} else {
|
||||
v1 := w[i-2]
|
||||
t1 := bits.RotateLeft64(v1, -19) ^ bits.RotateLeft64(v1, -61) ^ (v1 >> 6)
|
||||
v2 := w[i-15]
|
||||
t2 := bits.RotateLeft64(v2, -1) ^ bits.RotateLeft64(v2, -8) ^ (v2 >> 7)
|
||||
|
||||
w[i] = t1 + w[i-7] + t2 + w[i-16]
|
||||
}
|
||||
|
||||
t1 := h + (bits.RotateLeft64(e, -14) ^ bits.RotateLeft64(e, -18) ^ bits.RotateLeft64(e, -41)) + ((e & f) ^ (^e & g)) + _K[i] + w[i]
|
||||
|
||||
t2 := (bits.RotateLeft64(a, -28) ^ bits.RotateLeft64(a, -34) ^ bits.RotateLeft64(a, -39)) + ((a & b) ^ (a & c) ^ (b & c))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue