crypto/mldsa: don't precompute PublicKey

This is a difficult tradeoff, but precomputing the PublicKey makes it
very large (68KB), which is a significant downside for a type that is
often used for one-off signature verification and can stay around in
memory for a long time, for example as part of a TLS connection's
PeerCertificates.

fips140: off
goos: darwin
goarch: arm64
pkg: crypto/mldsa
cpu: Apple M2
                               │ 2c4fe1660f  │           d35f184725-dirty            │
                               │   sec/op    │    sec/op     vs base                 │
Verify/ML-DSA-44/Whole-8         74.74µ ± 0%    68.61µ ± 0%    -8.21% (p=0.000 n=20)
Verify/ML-DSA-44/Precomputed-8   36.43µ ± 1%    66.91µ ± 0%   +83.65% (p=0.000 n=20)
Verify/ML-DSA-65/Whole-8         113.4µ ± 0%    107.2µ ± 0%    -5.51% (p=0.000 n=20)
Verify/ML-DSA-65/Precomputed-8   51.56µ ± 0%   104.70µ ± 0%  +103.05% (p=0.000 n=20)
Verify/ML-DSA-87/Whole-8         178.0µ ± 0%    171.3µ ± 0%    -3.78% (p=0.000 n=20)
Verify/ML-DSA-87/Precomputed-8   76.84µ ± 0%   167.95µ ± 0%  +118.58% (p=0.000 n=20)

Updates #77626

Change-Id: Ib71867fcfb710f9f2f92c3df4312e2ef6a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/776707
Reviewed-by: Roland Shoemaker <roland@golang.org>
Reviewed-by: Daniel McCarney <daniel@binaryparadox.net>
TryBot-Bypass: Filippo Valsorda <filippo@golang.org>
Auto-Submit: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
Filippo Valsorda 2026-05-07 14:33:34 +02:00 committed by Gopher Robot
parent 7bc111c6eb
commit c888fd67f0
4 changed files with 48 additions and 32 deletions

View file

@ -58,6 +58,8 @@ const maxPubKeySize = PublicKeySize87
type PrivateKey struct {
seed [32]byte
pub PublicKey
a [maxK * maxL]nttElement
t1 [maxK]nttElement // NTT(t₁ ⋅ 2ᵈ)
s1 [maxL]nttElement
s2 [maxK]nttElement
t0 [maxK]nttElement
@ -82,9 +84,7 @@ func (priv *PrivateKey) PublicKey() *PublicKey {
type PublicKey struct {
raw [maxPubKeySize]byte
p parameters
a [maxK * maxL]nttElement
t1 [maxK]nttElement // NTT(t₁ ⋅ 2ᵈ)
tr [64]byte // public key hash
tr [64]byte // public key hash
}
func (pub *PublicKey) Equal(x *PublicKey) bool {
@ -183,7 +183,7 @@ func newPrivateKey(seed *[32]byte, p parameters) *PrivateKey {
ξ.Read(ρs)
ξ.Read(priv.k[:])
A := priv.pub.a[:k*l]
A := priv.a[:k*l]
computeMatrixA(A, ρ, p)
s1 := priv.s1[:l]
@ -219,13 +219,9 @@ func newPrivateKey(seed *[32]byte, p parameters) *PrivateKey {
t0[i] = ntt(w)
}
// The computations below (and their storage in the PrivateKey struct) are
// not strictly necessary and could be deferred to PrivateKey.PublicKey().
// That would require keeping or re-deriving ρ and t/t1, though.
pk := pkEncode(priv.pub.raw[:0], ρ, t1, p)
priv.pub.tr = computePublicKeyHash(pk)
computeT1Hat(priv.pub.t1[:k], t1) // NTT(t₁ ⋅ 2ᵈ)
computeT1Hat(priv.t1[:k], t1) // NTT(t₁ ⋅ 2ᵈ)
return priv
}
@ -302,31 +298,32 @@ func pkDecode(pk []byte, t1 [][n]uint16, p parameters) (ρ []byte, err error) {
var errInvalidPublicKeyLength = errors.New("mldsa: invalid public key length")
func NewPublicKey44(pk []byte) (*PublicKey, error) {
return newPublicKey(pk, params44)
return newPublicKey(&PublicKey{}, pk, params44)
}
func NewPublicKey65(pk []byte) (*PublicKey, error) {
return newPublicKey(pk, params65)
return newPublicKey(&PublicKey{}, pk, params65)
}
func NewPublicKey87(pk []byte) (*PublicKey, error) {
return newPublicKey(pk, params87)
return newPublicKey(&PublicKey{}, pk, params87)
}
func newPublicKey(pk []byte, p parameters) (*PublicKey, error) {
k, l := p.k, p.l
t1 := make([][n]uint16, k, maxK)
ρ, err := pkDecode(pk, t1, p)
if err != nil {
return nil, err
func newPublicKey(pub *PublicKey, pk []byte, p parameters) (*PublicKey, error) {
if len(pk) != pubKeySize(p) {
return nil, errInvalidPublicKeyLength
}
pub := &PublicKey{p: p}
// We don't precompute A and t1Hat here, because they would make the
// PublicKey over 68KB. Unlike private keys, public keys are often used to
// verify a signature only once, so precomputation doesn't help as often,
// but they can stay around in memory, for example as part of a TLS
// connection's PeerCertificates, so their size is more of a concern.
// Instead, we compute A and t1Hat on demand in Verify.
pub.p = p
copy(pub.raw[:], pk)
computeMatrixA(pub.a[:k*l], ρ, p)
pub.tr = computePublicKeyHash(pk)
computeT1Hat(pub.t1[:k], t1) // NTT(t₁ ⋅ 2ᵈ)
return pub, nil
}
@ -423,7 +420,7 @@ func computeMessageHash(tr []byte, msg []byte, context string) ([64]byte, error)
func signInternal(priv *PrivateKey, μ *[64]byte, random *[32]byte) []byte {
p, k, l := priv.pub.p, priv.pub.p.k, priv.pub.p.l
A, s1, s2, t0 := priv.pub.a[:k*l], priv.s1[:l], priv.s2[:k], priv.t0[:k]
A, s1, s2, t0 := priv.a[:k*l], priv.s1[:l], priv.s2[:k], priv.t0[:k]
β := p.τ * p.η
γ1 := uint32(1 << p.γ1)
@ -654,12 +651,21 @@ func VerifyExternalMu(pub *PublicKey, μ []byte, sig []byte) error {
func verifyInternal(pub *PublicKey, μ *[64]byte, sig []byte) error {
p, k, l := pub.p, pub.p.k, pub.p.l
t1, A := pub.t1[:k], pub.a[:k*l]
β := p.τ * p.η
γ1 := uint32(1 << p.γ1)
γ := γ1 - uint32(β)
t1 := make([][n]uint16, k, maxK)
ρ, err := pkDecode(pub.raw[:pubKeySize(pub.p)], t1, p)
if err != nil {
return err
}
A := make([]nttElement, k*l, maxK*maxL)
computeMatrixA(A, ρ, p)
t1Hat := make([]nttElement, k, maxK)
computeT1Hat(t1Hat, t1) // NTT(t₁ ⋅ 2ᵈ)
z := make([]ringElement, l, maxL)
h := make([][n]byte, k, maxK)
ch, err := sigDecode(sig, z, h, p)
@ -680,7 +686,7 @@ func verifyInternal(pub *PublicKey, μ *[64]byte, sig []byte) error {
for j := range l {
wHat = polyAdd(wHat, nttMul(A[i*l+j], zHat[j]))
}
wHat = polySub(wHat, nttMul(c, t1[i]))
wHat = polySub(wHat, nttMul(c, t1Hat[i]))
w[i] = inverseNTT(wHat)
}

View file

@ -57,7 +57,7 @@ func TestingOnlyNewPrivateKeyFromSemiExpanded(sk []byte) (*PrivateKey, error) {
priv := &PrivateKey{pub: PublicKey{p: p}}
priv.k = K
priv.pub.tr = tr
A := priv.pub.a[:k*l]
A := priv.a[:k*l]
computeMatrixA(A, ρ[:], p)
for r := range l {
priv.s1[r] = ntt(s1[r])
@ -110,7 +110,7 @@ func TestingOnlyNewPrivateKeyFromSemiExpanded(sk []byte) (*PrivateKey, error) {
if computePublicKeyHash(pk) != tr {
return nil, errors.New("mldsa: semi-expanded private key inconsistent with public key hash")
}
computeT1Hat(priv.pub.t1[:k], t1) // NTT(t₁ ⋅ 2ᵈ)
computeT1Hat(priv.t1[:k], t1) // NTT(t₁ ⋅ 2ᵈ)
return priv, nil
}

View file

@ -150,6 +150,10 @@ type PublicKey struct {
// NewPublicKey creates a new ML-DSA public key from the given encoding.
func NewPublicKey(params Parameters, encoding []byte) (*PublicKey, error) {
return newPublicKey(&PublicKey{}, params, encoding)
}
func newPublicKey(pub *PublicKey, params Parameters, encoding []byte) (*PublicKey, error) {
var err error
var pk *mldsa.PublicKey
switch params {
@ -165,7 +169,8 @@ func NewPublicKey(params Parameters, encoding []byte) (*PublicKey, error) {
if err != nil {
return nil, err
}
return &PublicKey{p: *pk}, nil
pub.p = *pk
return pub, nil
}
// Bytes returns the public key encoding.

View file

@ -157,21 +157,26 @@ func testGenerateKey(t *testing.T, params Parameters) {
func TestAllocations(t *testing.T) {
// We allocate
//
// - the PrivateKey (k and kk) and PublicKey (pk) structs
// - their temporary inner structs (3x)
// - the PrivateKey (k and kk) structs
// - their temporary inner structs (2x)
// - the public key (pkBytes) and signature (sig) byte slices
// - the k.PublicKey() return value
// - the Options argument to Sign
//
// on the heap. The structs are too large for the stack, the byte slices are
// variable-sized, and Options is cast into an interface.
//
// Still, check we are not slipping more allocations in.
var expected float64 = 10
var expected float64 = 7
if fips140.Enabled() {
// The PCT does a sign/verify cycle, which allocates a signature slice.
expected += 1
}
if fips140.Version() == "v1.26.0" {
// The v1.26.0 implementation precomputes PublicKey, making it large
// enough to require heap allocation. Add pk, its inner struct, and the
// return value of k.PublicKey().
expected += 3
}
cryptotest.SkipTestAllocations(t)
if allocs := testing.AllocsPerRun(100, func() {
k, err := GenerateKey(MLDSA44())