mirror of
https://github.com/golang/go.git
synced 2026-06-27 03:11:23 +00:00
cmd/compile: remove flags → bool → flags roundtrips on amd64
Fixes #76056 Fixes #76060 If we modify the issue's fieldReduceOnce2 function to: // fieldReduceOnce reduces a value a < 2q. func fieldReduceOnce2(a uint32) fieldElement { x, b := bits.Sub(uint(a), uint(q), 0) return fieldElement(subtle.ConstantTimeSelect(int(b), int(a), int(x))) } We get the wanted assembly*: MOVL AX, CX MOVL AX, DX SUBQ $8380417, CX CMOVQCS DX, CX MOVQ CX, AX ; not ideal code size but handled by the register renaming unit RET Changes made to fieldReduceOnce2: - fixed a bug where a and x arguments to subtle.ConstantTimeSelect were swapped. we should use a when the sub underflows and x otherwise. - use bits.Sub rather than bits.Sub32 which is intriscified. *we use CMOVQCS + MOVQ because the CMOV randomly gets generated backward, I believe this would be fixed if we teach regalloc to commut CMOV (by swapping the two register args and inverting the condition). Change-Id: I01eca545d3c5c8a1c1f5a107e0089f715359dfc6 Reviewed-on: https://go-review.googlesource.com/c/go/+/778141 LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Jorropo <jorropo.pgm@gmail.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
parent
c6eaf03788
commit
9e0467b174
4 changed files with 14087 additions and 10 deletions
|
|
@ -1812,3 +1812,55 @@
|
|||
(VPMOVMToVec32x16 (VCMPPS512 [3] x y))
|
||||
(VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) =>
|
||||
(VPMOVMToVec64x8 (VCMPPD512 [3] x y))
|
||||
|
||||
// remove flags → bool → flags roundtrip
|
||||
// Only do it if the flag generating instruction is local otherwise the likelihood flagalloc won't undo this optimization and makes things worse are slim.
|
||||
(NE t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no)
|
||||
(NE t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no)
|
||||
(NE t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no)
|
||||
(NE t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no)
|
||||
|
||||
(CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVQNE yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVQNE yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
|
||||
(CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVLNE yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
|
||||
(CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
(CMOVWNE yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags)
|
||||
|
||||
(SETNE t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => s
|
||||
(SETNE t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => s
|
||||
(SETNE t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => s
|
||||
(SETNE t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => s
|
||||
|
||||
(EQ t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no)
|
||||
(EQ t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no)
|
||||
(EQ t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no)
|
||||
(EQ t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no)
|
||||
|
||||
(CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVQEQ yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
|
||||
(CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVLEQ yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
|
||||
(CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
(CMOVWEQ yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags)
|
||||
|
||||
(SETEQ t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags)
|
||||
(SETEQ t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags)
|
||||
(SETEQ t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags)
|
||||
(SETEQ t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags)
|
||||
|
|
|
|||
|
|
@ -2720,7 +2720,7 @@ var invertEqNeqOp = map[Op]Op{
|
|||
// simplifyBlock simplifies some constant values in b and evaluates
|
||||
// branches to non-uniquely dominated successors of b.
|
||||
func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
|
||||
for iv, v := range b.Values {
|
||||
for _, v := range b.Values {
|
||||
switch v.Op {
|
||||
case OpStaticLECall:
|
||||
if b.Func.pass.debug > 0 && len(v.Args) == 2 {
|
||||
|
|
@ -2874,14 +2874,6 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
|
|||
v.reset(OpCondSelect)
|
||||
v.AddArg3(y, zero, check)
|
||||
|
||||
// FIXME: workaround for go.dev/issues/76060
|
||||
// we need to schedule the Neq before the CondSelect even tho
|
||||
// scheduling is meaningless until we reach the schedule pass.
|
||||
if b.Values[len(b.Values)-1] != check {
|
||||
panic("unreachable; failed sanity check, new value isn't at the end of the block")
|
||||
}
|
||||
b.Values[iv], b.Values[len(b.Values)-1] = b.Values[len(b.Values)-1], b.Values[iv]
|
||||
|
||||
if b.Func.pass.debug > 0 {
|
||||
b.Func.Warnl(v.Pos, "Rewrote Mul %v into CondSelect; %v is bool", v, x)
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -6,7 +6,10 @@
|
|||
|
||||
package codegen
|
||||
|
||||
import "crypto/subtle"
|
||||
import (
|
||||
"crypto/subtle"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func cmovint(c int) int {
|
||||
x := c + 4
|
||||
|
|
@ -808,3 +811,33 @@ func constantTimeSelect(v, x, y int) int {
|
|||
// riscv64/rva23u64:"CZERONEZ" "CZEROEQZ" "OR" -"SNEZ" -"NEG" -"AND"
|
||||
return subtle.ConstantTimeSelect(v, x, y)
|
||||
}
|
||||
|
||||
func issue76056fieldReduceOnceSub32(a uint32) uint32 {
|
||||
const q = 8380417 // 2²³ - 2¹³ + 1
|
||||
// FIXME: the compiler struggles with Sub32 since it's not intriscified.
|
||||
x, b := bits.Sub32(a, q, 0)
|
||||
// FIXME: prove doesn't rewrite this multiply to a condselect because it doesn't know that b is always 0 or 1.
|
||||
return x + b*q
|
||||
}
|
||||
|
||||
func issue76056fieldReduceOnce2Sub32(a uint32) uint32 {
|
||||
const q = 8380417 // 2²³ - 2¹³ + 1
|
||||
// FIXME: the compiler struggles with Sub32 since it's not intriscified.
|
||||
x, b := bits.Sub32(a, q, 0)
|
||||
return uint32(subtle.ConstantTimeSelect(int(b), int(a), int(x)))
|
||||
}
|
||||
|
||||
func issue76056fieldReduceOnceSub64(a uint32) uint32 {
|
||||
const q = 8380417 // 2²³ - 2¹³ + 1
|
||||
x, b := bits.Sub64(uint64(a), q, 0)
|
||||
// FIXME: prove doesn't rewrite this multiply to a condselect because it doesn't know that b is always 0 or 1.
|
||||
return uint32(x) + uint32(b)*q
|
||||
}
|
||||
|
||||
func issue76056fieldReduceOnce2Sub64(a uint32) uint32 {
|
||||
const q = 8380417 // 2²³ - 2¹³ + 1
|
||||
// amd64:"SUB" -"TEST" -"SBB"
|
||||
x, b := bits.Sub64(uint64(a), q, 0)
|
||||
// amd64:"CMOV" -"TEST" -"SBB"
|
||||
return uint32(subtle.ConstantTimeSelect(int(b), int(a), int(x)))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue