cmd/compile: implement Avg64u, Hmul64, Hmul64u for wasm

This lets us remove useAvg and useHmul from the division rules.
The compiler is simpler and the generated code is faster.

goos: wasip1
goarch: wasm
pkg: internal/strconv
                               │   old.txt   │               new.txt               │
                               │   sec/op    │   sec/op     vs base                │
AppendFloat/Decimal              192.8n ± 1%   194.6n ± 0%   +0.91% (p=0.000 n=10)
AppendFloat/Float                328.6n ± 0%   279.6n ± 0%  -14.93% (p=0.000 n=10)
AppendFloat/Exp                  335.6n ± 1%   289.2n ± 1%  -13.80% (p=0.000 n=10)
AppendFloat/NegExp               336.0n ± 0%   289.1n ± 1%  -13.97% (p=0.000 n=10)
AppendFloat/LongExp              332.4n ± 0%   285.2n ± 1%  -14.20% (p=0.000 n=10)
AppendFloat/Big                  348.2n ± 0%   300.1n ± 0%  -13.83% (p=0.000 n=10)
AppendFloat/BinaryExp            137.4n ± 0%   138.2n ± 0%   +0.55% (p=0.001 n=10)
AppendFloat/32Integer            193.3n ± 1%   196.5n ± 0%   +1.66% (p=0.000 n=10)
AppendFloat/32ExactFraction      283.3n ± 0%   268.9n ± 1%   -5.08% (p=0.000 n=10)
AppendFloat/32Point              279.9n ± 0%   266.5n ± 0%   -4.80% (p=0.000 n=10)
AppendFloat/32Exp                300.1n ± 0%   288.3n ± 1%   -3.90% (p=0.000 n=10)
AppendFloat/32NegExp             288.2n ± 1%   277.9n ± 1%   -3.59% (p=0.000 n=10)
AppendFloat/32Shortest           261.7n ± 0%   250.2n ± 0%   -4.39% (p=0.000 n=10)
AppendFloat/32Fixed8Hard         173.3n ± 1%   158.9n ± 1%   -8.31% (p=0.000 n=10)
AppendFloat/32Fixed9Hard         180.0n ± 0%   167.9n ± 2%   -6.70% (p=0.000 n=10)
AppendFloat/64Fixed1             167.1n ± 0%   149.6n ± 1%  -10.50% (p=0.000 n=10)
AppendFloat/64Fixed2             162.4n ± 1%   146.5n ± 0%   -9.73% (p=0.000 n=10)
AppendFloat/64Fixed2.5           165.5n ± 0%   149.4n ± 1%   -9.70% (p=0.000 n=10)
AppendFloat/64Fixed3             166.4n ± 1%   150.2n ± 0%   -9.74% (p=0.000 n=10)
AppendFloat/64Fixed4             163.7n ± 0%   149.6n ± 1%   -8.62% (p=0.000 n=10)
AppendFloat/64Fixed5Hard         182.8n ± 1%   167.1n ± 1%   -8.61% (p=0.000 n=10)
AppendFloat/64Fixed12            222.2n ± 0%   208.8n ± 0%   -6.05% (p=0.000 n=10)
AppendFloat/64Fixed16            197.6n ± 1%   181.7n ± 0%   -8.02% (p=0.000 n=10)
AppendFloat/64Fixed12Hard        194.5n ± 0%   181.0n ± 0%   -6.99% (p=0.000 n=10)
AppendFloat/64Fixed17Hard        205.1n ± 1%   191.9n ± 0%   -6.44% (p=0.000 n=10)
AppendFloat/64Fixed18Hard        6.269µ ± 0%   6.643µ ± 0%   +5.97% (p=0.000 n=10)
AppendFloat/64FixedF1            211.7n ± 1%   197.0n ± 0%   -6.95% (p=0.000 n=10)
AppendFloat/64FixedF2            189.4n ± 0%   174.2n ± 0%   -8.08% (p=0.000 n=10)
AppendFloat/64FixedF3            169.0n ± 0%   154.9n ± 0%   -8.32% (p=0.000 n=10)
AppendFloat/Slowpath64           321.2n ± 0%   274.2n ± 1%  -14.63% (p=0.000 n=10)
AppendFloat/SlowpathDenormal64   307.4n ± 1%   261.2n ± 0%  -15.03% (p=0.000 n=10)
AppendInt                        3.367µ ± 1%   3.376µ ± 0%        ~ (p=0.517 n=10)
AppendUint                       675.5n ± 0%   676.9n ± 0%        ~ (p=0.196 n=10)
AppendIntSmall                   28.13n ± 1%   28.17n ± 0%   +0.14% (p=0.015 n=10)
AppendUintVarlen/digits=1        20.70n ± 0%   20.51n ± 1%   -0.89% (p=0.018 n=10)
AppendUintVarlen/digits=2        20.43n ± 0%   20.27n ± 0%   -0.81% (p=0.001 n=10)
AppendUintVarlen/digits=3        38.48n ± 0%   37.93n ± 0%   -1.43% (p=0.000 n=10)
AppendUintVarlen/digits=4        41.10n ± 0%   38.78n ± 1%   -5.62% (p=0.000 n=10)
AppendUintVarlen/digits=5        42.25n ± 1%   42.11n ± 0%   -0.32% (p=0.041 n=10)
AppendUintVarlen/digits=6        45.40n ± 1%   43.14n ± 0%   -4.98% (p=0.000 n=10)
AppendUintVarlen/digits=7        46.81n ± 1%   46.03n ± 0%   -1.66% (p=0.000 n=10)
AppendUintVarlen/digits=8        48.88n ± 1%   46.59n ± 1%   -4.68% (p=0.000 n=10)
AppendUintVarlen/digits=9        49.94n ± 2%   49.41n ± 1%   -1.06% (p=0.000 n=10)
AppendUintVarlen/digits=10       57.28n ± 1%   56.92n ± 1%   -0.62% (p=0.045 n=10)
AppendUintVarlen/digits=11       60.09n ± 1%   58.11n ± 2%   -3.30% (p=0.000 n=10)
AppendUintVarlen/digits=12       62.22n ± 0%   61.85n ± 0%   -0.59% (p=0.000 n=10)
AppendUintVarlen/digits=13       64.94n ± 0%   62.92n ± 0%   -3.10% (p=0.000 n=10)
AppendUintVarlen/digits=14       65.42n ± 1%   65.19n ± 1%   -0.34% (p=0.005 n=10)
AppendUintVarlen/digits=15       68.17n ± 0%   66.13n ± 0%   -2.99% (p=0.000 n=10)
AppendUintVarlen/digits=16       70.21n ± 1%   70.09n ± 1%        ~ (p=0.517 n=10)
AppendUintVarlen/digits=17       72.93n ± 0%   70.49n ± 0%   -3.34% (p=0.000 n=10)
AppendUintVarlen/digits=18       73.01n ± 0%   72.75n ± 0%   -0.35% (p=0.000 n=10)
AppendUintVarlen/digits=19       79.27n ± 1%   79.49n ± 1%        ~ (p=0.671 n=10)
AppendUintVarlen/digits=20       82.18n ± 0%   80.43n ± 1%   -2.14% (p=0.000 n=10)
geomean                          143.4n        136.0n        -5.20%


Change-Id: I8245814a0259ad13cf9225f57db8e9fe3d2e4267
Reviewed-on: https://go-review.googlesource.com/c/go/+/717407
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Russ Cox 2025-11-03 22:09:48 -05:00
parent 9f6590f333
commit 6e165b4d17
8 changed files with 322 additions and 94 deletions

View file

@ -124,6 +124,7 @@ func div7_int8(i int8) int8 {
// arm64: "MULW"
// arm64: "SBFX [$]10, R[0-9]+, [$]22,"
// arm64: "SUB R[0-9]+->31,"
// wasm: "I64Const [$]147"
return i / 7
}
@ -136,6 +137,7 @@ func div7_int16(i int16) int16 {
// arm64: "MULW"
// arm64: "SBFX [$]18, R[0-9]+, [$]14,"
// arm64: "SUB R[0-9]+->31,"
// wasm: "I64Const [$]37450"
return i / 7
}
@ -145,6 +147,7 @@ func div7_int32(i int32) int32 {
// arm64: "MUL "
// arm64: "ASR [$]34,"
// arm64: "SUB R[0-9]+->63,"
// wasm: "I64Const [$]2454267027"
return i / 7
}
@ -160,6 +163,7 @@ func div9_int32(i int32) int32 {
// arm64: "MUL "
// arm64: "ASR [$]35,"
// arm64: "SUB R[0-9]+->63,"
// wasm: "I64Const [$]3817748708"
return i / 9
}
@ -170,6 +174,8 @@ func div7_int64(i int64) int64 {
// arm64: "SMULH"
// arm64: "ASR [$]1,"
// arm64: "SUB R[0-9]+->63,"
// wasm: "I64Const [$]613566757"
// wasm: "I64Const [$]1227133513"
return i / 7
}
@ -185,6 +191,7 @@ func div3_int32(i int32) int32 {
// arm64: "MUL"
// arm64: "ASR [$]33,"
// arm64: "SUB R[0-9]+->63,"
// wasm: "I64Const [$]2863311531"
return i / 3
}
@ -195,6 +202,8 @@ func div3_int64(i int64) int64 {
// arm64: "ADD"
// arm64: "ASR [$]1,"
// arm64: "SUB R[0-9]+->63,"
// wasm: "I64Const [$]-1431655766"
// wasm: "I64Const [$]2863311531"
return i / 3
}
@ -211,6 +220,8 @@ func div7_int16u(i int16) int16 {
// arm64: "MULW"
// arm64: "UBFX [$]18, R[0-9]+, [$]14,"
// arm64: -"SUB"
// wasm: "I64Const [$]37450"
// wasm -"I64Sub"
return i / 7
}
@ -226,6 +237,8 @@ func div7_int32u(i int32) int32 {
// arm64: "MUL"
// arm64: "LSR [$]34,"
// arm64: -"SUB"
// wasm: "I64Const [$]2454267027"
// wasm -"I64Sub"
return i / 7
}
@ -238,6 +251,9 @@ func div7_int64u(i int64) int64 {
// arm64: "UMULH"
// arm64: "LSR [$]2,"
// arm64: -"SUB"
// wasm: "I64Const [$]1227133514"
// wasm: "I64Const [$]2454267026"
// wasm -"I64Sub"
return i / 7
}
@ -249,6 +265,7 @@ func div7_uint8(i uint8) uint8 {
// arm64: "MOVD [$]293,"
// arm64: "MULW"
// arm64: "UBFX [$]11, R[0-9]+, [$]21,"
// wasm: "I64Const [$]293"
return i / 7
}
@ -257,6 +274,7 @@ func div7_uint16(i uint16) uint16 {
// arm64: "MOVD [$]74899,"
// arm64: "MUL"
// arm64: "LSR [$]19,"
// wasm: "I64Const [$]74899"
return i / 7
}
@ -267,6 +285,7 @@ func div3_uint16(i uint16) uint16 {
// arm64: "MOVD [$]87382,"
// arm64: "MUL"
// arm64: "LSR [$]18,"
// wasm: "I64Const [$]87382"
return i / 3
}
@ -275,6 +294,7 @@ func div3_uint32(i uint32) uint32 {
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL"
// arm64: "LSR [$]33,"
// wasm: "I64Const [$]2863311531"
return i / 3
}
@ -286,6 +306,8 @@ func div3_uint64(i uint64) uint64 {
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]1,"
// wasm: "I64Const [$]2863311530"
// wasm: "I64Const [$]2863311531"
return i / 3
}
@ -307,6 +329,7 @@ func div14_uint32(i uint32) uint32 {
// arm64: "MOVD [$]2454267027,"
// arm64: "MUL"
// arm64: "LSR [$]34,"
// wasm: "I64Const [$]2454267027"
return i / 14
}
@ -318,6 +341,8 @@ func div14_uint64(i uint64) uint64 {
// arm64: "MOVD [$]-7905747460161236406,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
// wasm: "I64Const [$]1227133514"
// wasm: "I64Const [$]2454267026"
return i / 14
}
@ -345,6 +370,7 @@ func div7_uint32(i uint32) uint32 {
// arm64: "SUB"
// arm64: "ADD R[0-9]+>>1,"
// arm64: "LSR [$]34,"
// wasm: "I64Const [$]613566757"
return i / 7
}
@ -358,6 +384,8 @@ func div7_uint64(i uint64) uint64 {
// arm64: "SUB",
// arm64: "ADD R[0-9]+>>1,"
// arm64: "LSR [$]2,"
// wasm: "I64Const [$]613566756"
// wasm: "I64Const [$]2454267027"
return i / 7
}
@ -370,6 +398,8 @@ func div12345_uint64(i uint64) uint64 {
// arm64: "MOVD [$]-6205696892516465602,"
// arm64: "UMULH"
// arm64: "LSR [$]13,"
// wasm: "I64Const [$]835683390"
// wasm: "I64Const [$]2850090894"
return i / 12345
}
@ -480,7 +510,7 @@ func div_divis32_uint8(i uint8) (uint8, bool) {
// arm64: "UBFX [$]5, R[0-9]+, [$]3"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_uint8(i uint8) (uint8, bool) {
@ -490,7 +520,7 @@ func div_ndivis32_uint8(i uint8) (uint8, bool) {
// arm64: "UBFX [$]5, R[0-9]+, [$]3"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_uint16(i uint16) (uint16, bool) {
@ -500,7 +530,7 @@ func div_divis32_uint16(i uint16) (uint16, bool) {
// arm64: "UBFX [$]5, R[0-9]+, [$]11"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_uint16(i uint16) (uint16, bool) {
@ -510,7 +540,7 @@ func div_ndivis32_uint16(i uint16) (uint16, bool) {
// arm64: "UBFX [$]5, R[0-9]+, [$]11,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_uint32(i uint32) (uint32, bool) {
@ -520,7 +550,7 @@ func div_divis32_uint32(i uint32) (uint32, bool) {
// arm64: "UBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_uint32(i uint32) (uint32, bool) {
@ -530,7 +560,7 @@ func div_ndivis32_uint32(i uint32) (uint32, bool) {
// arm64: "UBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_uint64(i uint64) (uint64, bool) {
@ -541,7 +571,7 @@ func div_divis32_uint64(i uint64) (uint64, bool) {
// arm64: "LSR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_uint64(i uint64) (uint64, bool) {
@ -552,7 +582,7 @@ func div_ndivis32_uint64(i uint64) (uint64, bool) {
// arm64: "LSR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_int8(i int8) (int8, bool) {
@ -566,7 +596,7 @@ func div_divis32_int8(i int8) (int8, bool) {
// arm64: "SBFX [$]5, R[0-9]+, [$]3,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_int8(i int8) (int8, bool) {
@ -580,7 +610,7 @@ func div_ndivis32_int8(i int8) (int8, bool) {
// arm64: "SBFX [$]5, R[0-9]+, [$]3,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_int16(i int16) (int16, bool) {
@ -594,7 +624,7 @@ func div_divis32_int16(i int16) (int16, bool) {
// arm64: "SBFX [$]5, R[0-9]+, [$]11,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_int16(i int16) (int16, bool) {
@ -608,7 +638,7 @@ func div_ndivis32_int16(i int16) (int16, bool) {
// arm64: "SBFX [$]5, R[0-9]+, [$]11,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_int32(i int32) (int32, bool) {
@ -622,7 +652,7 @@ func div_divis32_int32(i int32) (int32, bool) {
// arm64: "SBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_int32(i int32) (int32, bool) {
@ -636,7 +666,7 @@ func div_ndivis32_int32(i int32) (int32, bool) {
// arm64: "SBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
func div_divis32_int64(i int64) (int64, bool) {
@ -651,7 +681,7 @@ func div_divis32_int64(i int64) (int64, bool) {
// arm64: "ASR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
return i / 32, i%32 == 0
}
func div_ndivis32_int64(i int64) (int64, bool) {
@ -666,7 +696,7 @@ func div_ndivis32_int64(i int64) (int64, bool) {
// arm64: "ASR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
return i / 32, i%32 != 0
}
// Divisibility and non-divisibility by non-power-of-two.
@ -923,7 +953,7 @@ func div_divis6_uint8(i uint8) (uint8, bool) {
// arm64: "UBFX [$]11, R[0-9]+, [$]21,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_uint8(i uint8) (uint8, bool) {
@ -936,7 +966,7 @@ func div_ndivis6_uint8(i uint8) (uint8, bool) {
// arm64: "UBFX [$]11, R[0-9]+, [$]21,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_uint16(i uint16) (uint16, bool) {
@ -950,7 +980,7 @@ func div_divis6_uint16(i uint16) (uint16, bool) {
// arm64: "LSR [$]19,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_uint16(i uint16) (uint16, bool) {
@ -964,7 +994,7 @@ func div_ndivis6_uint16(i uint16) (uint16, bool) {
// arm64: "LSR [$]19,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_uint32(i uint32) (uint32, bool) {
@ -978,7 +1008,7 @@ func div_divis6_uint32(i uint32) (uint32, bool) {
// arm64: "LSR [$]34,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_uint32(i uint32) (uint32, bool) {
@ -992,7 +1022,7 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) {
// arm64: "LSR [$]34,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_uint64(i uint64) (uint64, bool) {
@ -1009,7 +1039,7 @@ func div_divis6_uint64(i uint64) (uint64, bool) {
// arm64: "LSR [$]2,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_uint64(i uint64) (uint64, bool) {
@ -1026,7 +1056,7 @@ func div_ndivis6_uint64(i uint64) (uint64, bool) {
// arm64: "LSR [$]2,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_int8(i int8) (int8, bool) {
@ -1042,7 +1072,7 @@ func div_divis6_int8(i int8) (int8, bool) {
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_int8(i int8) (int8, bool) {
@ -1058,7 +1088,7 @@ func div_ndivis6_int8(i int8) (int8, bool) {
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_int16(i int16) (int16, bool) {
@ -1074,7 +1104,7 @@ func div_divis6_int16(i int16) (int16, bool) {
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_int16(i int16) (int16, bool) {
@ -1090,7 +1120,7 @@ func div_ndivis6_int16(i int16) (int16, bool) {
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_int32(i int32) (int32, bool) {
@ -1107,7 +1137,7 @@ func div_divis6_int32(i int32) (int32, bool) {
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_int32(i int32) (int32, bool) {
@ -1124,7 +1154,7 @@ func div_ndivis6_int32(i int32) (int32, bool) {
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}
func div_divis6_int64(i int64) (int64, bool) {
@ -1145,7 +1175,7 @@ func div_divis6_int64(i int64) (int64, bool) {
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
return i / 6, i%6 == 0
}
func div_ndivis6_int64(i int64) (int64, bool) {
@ -1166,5 +1196,5 @@ func div_ndivis6_int64(i int64) (int64, bool) {
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
return i / 6, i%6 != 0
}