mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
Restore generic addMulVVW for wasm (and therefore for all
architectures). Apply wasm-specific implementation for only the
explicitly sized functions (addMulVVW1024 etc.).
Also, for the sized functions, use unsafe pointer calculations
directly, without converting them back to slices. (This is what
the assembly code does on other architectures.) This results in a
bit more speedup for crypto/rsa benchmarks on Wasm:
pkg: crypto/rsa
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
DecryptPKCS1v15/2048 4.906m ± 0% 4.221m ± 1% -13.96% (p=0.000 n=25)
DecryptPKCS1v15/3072 15.18m ± 0% 13.57m ± 0% -10.64% (p=0.000 n=25)
DecryptPKCS1v15/4096 35.49m ± 0% 32.64m ± 1% -8.04% (p=0.000 n=25)
EncryptPKCS1v15/2048 177.1µ ± 0% 162.3µ ± 0% -8.35% (p=0.000 n=25)
DecryptOAEP/2048 4.900m ± 1% 4.233m ± 0% -13.61% (p=0.000 n=25)
EncryptOAEP/2048 181.8µ ± 0% 166.8µ ± 0% -8.24% (p=0.000 n=25)
SignPKCS1v15/2048 5.026m ± 1% 4.341m ± 0% -13.63% (p=0.000 n=25)
VerifyPKCS1v15/2048 177.2µ ± 0% 161.3µ ± 1% -8.97% (p=0.000 n=25)
SignPSS/2048 5.020m ± 0% 4.344m ± 1% -13.47% (p=0.000 n=25)
VerifyPSS/2048 182.2µ ± 1% 166.6µ ± 0% -8.52% (p=0.000 n=25)
geomean 1.791m 1.598m -10.78%
Change-Id: I89775c46a0bbe29380889047ba393c6cfc093ff1
Reviewed-on: https://go-review.googlesource.com/c/go/+/628255
Reviewed-by: Filippo Valsorda <filippo@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
61 lines
1.4 KiB
Go
61 lines
1.4 KiB
Go
// Copyright 2024 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
//go:build !purego
|
|
|
|
package bigmod
|
|
|
|
import "unsafe"
|
|
|
|
// The generic implementation relies on 64x64->128 bit multiplication and
|
|
// 64-bit add-with-carry, which are compiler intrinsics on many architectures.
|
|
// Wasm doesn't support those. Here we implement it with 32x32->64 bit
|
|
// operations, which is more efficient on Wasm.
|
|
|
|
func idx(x *uint, i uintptr) *uint {
|
|
return (*uint)(unsafe.Pointer(uintptr(unsafe.Pointer(x)) + i*8))
|
|
}
|
|
|
|
func addMulVVWWasm(z, x *uint, y uint, n uintptr) (carry uint) {
|
|
const mask32 = 1<<32 - 1
|
|
y0 := y & mask32
|
|
y1 := y >> 32
|
|
for i := range n {
|
|
xi := *idx(x, i)
|
|
x0 := xi & mask32
|
|
x1 := xi >> 32
|
|
zi := *idx(z, i)
|
|
z0 := zi & mask32
|
|
z1 := zi >> 32
|
|
c0 := carry & mask32
|
|
c1 := carry >> 32
|
|
|
|
w00 := x0*y0 + z0 + c0
|
|
l00 := w00 & mask32
|
|
h00 := w00 >> 32
|
|
|
|
w01 := x0*y1 + z1 + h00
|
|
l01 := w01 & mask32
|
|
h01 := w01 >> 32
|
|
|
|
w10 := x1*y0 + c1 + l01
|
|
h10 := w10 >> 32
|
|
|
|
carry = x1*y1 + h10 + h01
|
|
*idx(z, i) = w10<<32 + l00
|
|
}
|
|
return carry
|
|
}
|
|
|
|
func addMulVVW1024(z, x *uint, y uint) (c uint) {
|
|
return addMulVVWWasm(z, x, y, 1024/_W)
|
|
}
|
|
|
|
func addMulVVW1536(z, x *uint, y uint) (c uint) {
|
|
return addMulVVWWasm(z, x, y, 1536/_W)
|
|
}
|
|
|
|
func addMulVVW2048(z, x *uint, y uint) (c uint) {
|
|
return addMulVVWWasm(z, x, y, 2048/_W)
|
|
}
|