mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
crypto/sha3: reduce cSHAKE allocations
Consider a hypothetical SumCSHAKE256 function:
func SumCSHAKE256(N, S, data []byte, length int) []byte {
out := make([]byte, 64)
return sumCSHAKE256(out, N, S, data, length)
}
func sumCSHAKE256(out, N, S, data []byte, length int) []byte {
if len(out) < length {
out = make([]byte, length)
} else {
out = out[:length]
}
h := sha3.NewCSHAKE256(N, S)
h.Write(data)
h.Read(out)
return out
}
Currently this has 4 allocations:
- one for out (unless stack allocated),
- one for the SHAKE result of crypto/internal/fips140/sha3.newCShake,
- one for the initBlock allocation in crypto/internal/fips140/sha3.newCShake,
- one for the result of crypto/internal/fips140/sha3.bytepad.
We eliminate the SHAKE allocation by outlining the SHAKE allocation in
crypto/internal/fips140/sha3.NewCSHAKE128 and NewCSHAKE256. As
crypto/sha3.NewCSHAKE128 and NewCSHAKE256 immediately de-reference this
result, this allocation is eliminated.
We eliminate the bytepad allocation by instead writing the various
values directly with SHAKE.Write. Values passed to Write don't escape
and, with the exception of data (which is initBlock), all our Writes are
of fixed size allocations. We can't simply modify bytepad to return a
fixed size byte-slice as the length of data is not constant nor does it
have a reasonable upper bound.
We're stuck with the initBlock allocation because of the API (Reset and
the various marshallers), but we still net a substantial improvement.
benchstat output using the following benchmark:
func BenchmarkSumCSHAKE256(b *testing.B) {
N := []byte("N")
S := []byte("S")
data := []byte("testdata")
b.SetBytes(64)
for b.Loop() {
SumCSHAKE256(N, S, data, 64)
}
}
name old time/op new time/op delta
SumCSHAKE256-12 1.09µs ±20% 0.79µs ± 1% -27.41% (p=0.000 n=10+9)
name old speed new speed delta
SumCSHAKE256-12 59.8MB/s ±18% 81.0MB/s ± 1% +35.33% (p=0.000 n=10+9)
name old alloc/op new alloc/op delta
SumCSHAKE256-12 536B ± 0% 88B ± 0% -83.58% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
SumCSHAKE256-12 4.00 ± 0% 2.00 ± 0% -50.00% (p=0.000 n=10+10)
Updates #69982
Change-Id: If426ea8127c58f5ef062cf74712ec70fd26a7372
Reviewed-on: https://go-review.googlesource.com/c/go/+/636255
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
This commit is contained in:
parent
e3088d6eb8
commit
bd9222b525
1 changed files with 31 additions and 17 deletions
|
|
@ -23,14 +23,14 @@ type SHAKE struct {
|
||||||
initBlock []byte
|
initBlock []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func bytepad(data []byte, rate int) []byte {
|
func bytepadWrite(c *SHAKE, data []byte, rate int) {
|
||||||
out := make([]byte, 0, 9+len(data)+rate-1)
|
rateEnc := leftEncode(uint64(rate))
|
||||||
out = append(out, leftEncode(uint64(rate))...)
|
c.Write(rateEnc)
|
||||||
out = append(out, data...)
|
c.Write(data)
|
||||||
if padlen := rate - len(out)%rate; padlen < rate {
|
if padlen := rate - (len(rateEnc)+len(data))%rate; padlen < rate {
|
||||||
out = append(out, make([]byte, padlen)...)
|
const maxRate = rateK256
|
||||||
|
c.Write(make([]byte, padlen, maxRate)) // explicit cap to allow stack allocation
|
||||||
}
|
}
|
||||||
return out
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func leftEncode(x uint64) []byte {
|
func leftEncode(x uint64) []byte {
|
||||||
|
|
@ -47,14 +47,14 @@ func leftEncode(x uint64) []byte {
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) *SHAKE {
|
func newCShake(c *SHAKE, N, S []byte, rate, outputLen int, dsbyte byte) *SHAKE {
|
||||||
c := &SHAKE{d: Digest{rate: rate, outputLen: outputLen, dsbyte: dsbyte}}
|
c.d = Digest{rate: rate, outputLen: outputLen, dsbyte: dsbyte}
|
||||||
c.initBlock = make([]byte, 0, 9+len(N)+9+len(S)) // leftEncode returns max 9 bytes
|
c.initBlock = make([]byte, 0, 9+len(N)+9+len(S)) // leftEncode returns max 9 bytes
|
||||||
c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...)
|
c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...)
|
||||||
c.initBlock = append(c.initBlock, N...)
|
c.initBlock = append(c.initBlock, N...)
|
||||||
c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...)
|
c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...)
|
||||||
c.initBlock = append(c.initBlock, S...)
|
c.initBlock = append(c.initBlock, S...)
|
||||||
c.Write(bytepad(c.initBlock, c.d.rate))
|
bytepadWrite(c, c.initBlock, c.d.rate)
|
||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,7 +82,7 @@ func (s *SHAKE) Read(out []byte) (n int, err error) {
|
||||||
func (s *SHAKE) Reset() {
|
func (s *SHAKE) Reset() {
|
||||||
s.d.Reset()
|
s.d.Reset()
|
||||||
if len(s.initBlock) != 0 {
|
if len(s.initBlock) != 0 {
|
||||||
s.Write(bytepad(s.initBlock, s.d.rate))
|
bytepadWrite(s, s.initBlock, s.d.rate)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -132,10 +132,17 @@ func NewShake256() *SHAKE {
|
||||||
// cSHAKE is desired. S is a customization byte string used for domain
|
// cSHAKE is desired. S is a customization byte string used for domain
|
||||||
// separation. When N and S are both empty, this is equivalent to NewShake128.
|
// separation. When N and S are both empty, this is equivalent to NewShake128.
|
||||||
func NewCShake128(N, S []byte) *SHAKE {
|
func NewCShake128(N, S []byte) *SHAKE {
|
||||||
if len(N) == 0 && len(S) == 0 {
|
// The actual logic is in a separate function to outline this allocation.
|
||||||
return NewShake128()
|
c := &SHAKE{}
|
||||||
|
return newCShake128(c, N, S)
|
||||||
}
|
}
|
||||||
return newCShake(N, S, rateK256, 32, dsbyteCShake)
|
|
||||||
|
func newCShake128(c *SHAKE, N, S []byte) *SHAKE {
|
||||||
|
if len(N) == 0 && len(S) == 0 {
|
||||||
|
*c = *NewShake128()
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
return newCShake(c, N, S, rateK256, 32, dsbyteCShake)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCShake256 creates a new cSHAKE256 XOF.
|
// NewCShake256 creates a new cSHAKE256 XOF.
|
||||||
|
|
@ -144,8 +151,15 @@ func NewCShake128(N, S []byte) *SHAKE {
|
||||||
// cSHAKE is desired. S is a customization byte string used for domain
|
// cSHAKE is desired. S is a customization byte string used for domain
|
||||||
// separation. When N and S are both empty, this is equivalent to NewShake256.
|
// separation. When N and S are both empty, this is equivalent to NewShake256.
|
||||||
func NewCShake256(N, S []byte) *SHAKE {
|
func NewCShake256(N, S []byte) *SHAKE {
|
||||||
|
// The actual logic is in a separate function to outline this allocation.
|
||||||
|
c := &SHAKE{}
|
||||||
|
return newCShake256(c, N, S)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newCShake256(c *SHAKE, N, S []byte) *SHAKE {
|
||||||
if len(N) == 0 && len(S) == 0 {
|
if len(N) == 0 && len(S) == 0 {
|
||||||
return NewShake256()
|
*c = *NewShake256()
|
||||||
|
return c
|
||||||
}
|
}
|
||||||
return newCShake(N, S, rateK512, 64, dsbyteCShake)
|
return newCShake(c, N, S, rateK512, 64, dsbyteCShake)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue