From 592775ec7d8bbc99ee0a1ada56c4490c855f9385 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Mon, 27 Oct 2025 18:58:52 +0100 Subject: [PATCH] crypto/mlkem: avoid a few unnecessary inverse NTT calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were mistakenly doing NTT⁻¹ inside the inner loop, on the components of the inner product intead of the sum, leading to k² = 9 inverse NTT calls instead of k = 3 inverse NTT. Surprisingly large speedup as a result. fips140: off goos: darwin goarch: arm64 pkg: crypto/mlkem cpu: Apple M2 │ 4c285e0988 │ 4c285e0988-dirty │ │ sec/op │ sec/op vs base │ KeyGen-2 28.95µ ± 3% 28.64µ ± 4% ~ (p=0.699 n=6) Encaps-2 43.13µ ± 3% 35.02µ ± 1% -18.81% (p=0.002 n=6) Decaps-2 43.80µ ± 1% 35.49µ ± 1% -18.97% (p=0.002 n=6) RoundTrip/Alice-2 77.27µ ± 7% 69.12µ ± 3% -10.55% (p=0.002 n=6) RoundTrip/Bob-2 43.08µ ± 2% 35.14µ ± 3% -18.44% (p=0.002 n=6) geomean 44.88µ 38.67µ -13.84% Change-Id: I6a6a69649c1378411c9aca75d473fd5b9984a609 Reviewed-on: https://go-review.googlesource.com/c/go/+/715381 Reviewed-by: Junyang Shao Reviewed-by: Mark Freeman LUCI-TryBot-Result: Go LUCI Auto-Submit: Filippo Valsorda Reviewed-by: Daniel McCarney --- src/crypto/internal/fips140/mlkem/mlkem1024.go | 5 +++-- src/crypto/internal/fips140/mlkem/mlkem768.go | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/crypto/internal/fips140/mlkem/mlkem1024.go b/src/crypto/internal/fips140/mlkem/mlkem1024.go index edde161422c..953eea9bc24 100644 --- a/src/crypto/internal/fips140/mlkem/mlkem1024.go +++ b/src/crypto/internal/fips140/mlkem/mlkem1024.go @@ -369,11 +369,12 @@ func pkeEncrypt1024(cc *[CiphertextSize1024]byte, ex *encryptionKey1024, m *[mes u := make([]ringElement, k1024) // NTT⁻¹(AT ◦ r) + e1 for i := range u { - u[i] = e1[i] + var uHat nttElement for j := range r { // Note that i and j are inverted, as we need the transposed of A. - u[i] = polyAdd(u[i], inverseNTT(nttMul(ex.a[j*k1024+i], r[j]))) + uHat = polyAdd(uHat, nttMul(ex.a[j*k1024+i], r[j])) } + u[i] = polyAdd(e1[i], inverseNTT(uHat)) } μ := ringDecodeAndDecompress1(m) diff --git a/src/crypto/internal/fips140/mlkem/mlkem768.go b/src/crypto/internal/fips140/mlkem/mlkem768.go index 088c2954de6..c4c3a9deaf4 100644 --- a/src/crypto/internal/fips140/mlkem/mlkem768.go +++ b/src/crypto/internal/fips140/mlkem/mlkem768.go @@ -428,11 +428,12 @@ func pkeEncrypt(cc *[CiphertextSize768]byte, ex *encryptionKey, m *[messageSize] u := make([]ringElement, k) // NTT⁻¹(AT ◦ r) + e1 for i := range u { - u[i] = e1[i] + var uHat nttElement for j := range r { // Note that i and j are inverted, as we need the transposed of A. - u[i] = polyAdd(u[i], inverseNTT(nttMul(ex.a[j*k+i], r[j]))) + uHat = polyAdd(uHat, nttMul(ex.a[j*k+i], r[j])) } + u[i] = polyAdd(e1[i], inverseNTT(uHat)) } μ := ringDecodeAndDecompress1(m)