cmd/compile: shuffle bits.Sub intrinsic generation on amd64

Assuming the CPU recognize SBB RX, RX as a dependency break,
this is a no-op however SET is much more canonical and easier
to match for.

Updates #76056

Change-Id: Icc590dbcc76a8ed2fca7b167cfb66a2d33d4d2d5
Reviewed-on: https://go-review.googlesource.com/c/go/+/778140
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Jorropo <jorropo.pgm@gmail.com>
This commit is contained in:
Jorropo 2026-05-15 09:38:16 +02:00 committed by Gopher Robot
parent 8bd95ae848
commit 212065c922
3 changed files with 36 additions and 45 deletions

View file

@ -27,15 +27,10 @@
(Div8u x y) => (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
(Div(32|64)F ...) => (DIVS(S|D) ...)
(Select0 (Add64carry x y c)) =>
(Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
(Select1 (Add64carry x y c)) =>
(NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
(Select0 (Sub64borrow x y c)) =>
(Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
(Select1 (Sub64borrow x y c)) =>
(NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
(Select0 (Add64carry x y c)) => (Select0 (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
(Select1 (Add64carry x y c)) => (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
(Select0 (Sub64borrow x y c)) => (Select0 (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
(Select1 (Sub64borrow x y c)) => (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
// Optimize ADCQ and friends
(ADCQ x (MOVQconst [c]) carry) && is32Bit(c) => (ADCQconst x [int32(c)] carry)
(ADCQ x y (FlagEQ)) => (ADDQcarry x y)
@ -46,7 +41,7 @@
(SBBQconst x [c] (FlagEQ)) => (SUBQconstborrow x [c])
(SUBQborrow x (MOVQconst [c])) && is32Bit(c) => (SUBQconstborrow x [int32(c)])
(Select1 (NEGLflags (MOVQconst [0]))) => (FlagEQ)
(Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) => x
(Select1 (NEGLflags (MOVBQZX (SETB x)))) => x
(Mul64uhilo ...) => (MULQU2 ...)

View file

@ -75874,7 +75874,7 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
return true
}
// match: (Select0 (Add64carry x y c))
// result: (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
// result: (Select0 (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
for {
if v_0.Op != OpAdd64carry {
break
@ -75883,7 +75883,6 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpSelect0)
v.Type = typ.UInt64
v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
@ -75894,7 +75893,7 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
return true
}
// match: (Select0 (Sub64borrow x y c))
// result: (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
// result: (Select0 (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
for {
if v_0.Op != OpSub64borrow {
break
@ -75903,7 +75902,6 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpSelect0)
v.Type = typ.UInt64
v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
@ -76018,7 +76016,7 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
return true
}
// match: (Select1 (Add64carry x y c))
// result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
// result: (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
for {
if v_0.Op != OpAdd64carry {
break
@ -76026,9 +76024,8 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpAMD64NEGQ)
v.Type = typ.UInt64
v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
v.reset(OpAMD64MOVBQZX)
v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8])
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
@ -76042,7 +76039,7 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
return true
}
// match: (Select1 (Sub64borrow x y c))
// result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
// result: (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
for {
if v_0.Op != OpSub64borrow {
break
@ -76050,9 +76047,8 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpAMD64NEGQ)
v.Type = typ.UInt64
v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
v.reset(OpAMD64MOVBQZX)
v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8])
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
@ -76078,18 +76074,18 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
v.reset(OpAMD64FlagEQ)
return true
}
// match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x))))
// match: (Select1 (NEGLflags (MOVBQZX (SETB x))))
// result: x
for {
if v_0.Op != OpAMD64NEGLflags {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpAMD64NEGQ {
if v_0_0.Op != OpAMD64MOVBQZX {
break
}
v_0_0_0 := v_0_0.Args[0]
if v_0_0_0.Op != OpAMD64SBBQcarrymask {
if v_0_0_0.Op != OpAMD64SETB {
break
}
x := v_0_0_0.Args[0]

View file

@ -535,7 +535,7 @@ func IterateBits8(n uint8) int {
func Add(x, y, ci uint) (r, co uint) {
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
// ppc64x: "ADDC" "ADDE" "ADDZE"
// s390x:"ADDE" "ADDC [$]-1,"
// riscv64: "ADD" "SLTU"
@ -544,7 +544,7 @@ func Add(x, y, ci uint) (r, co uint) {
func AddC(x, ci uint) (r, co uint) {
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
// loong64: "ADDV" "SGTU"
// ppc64x: "ADDC" "ADDE" "ADDZE"
// s390x:"ADDE" "ADDC [$]-1,"
@ -555,7 +555,7 @@ func AddC(x, ci uint) (r, co uint) {
func AddZ(x, y uint) (r, co uint) {
// arm64:"ADDS" "ADC" -"ADCS" -"ADD " -"CMP"
// amd64:"ADDQ" "SBBQ" "NEGQ" -"NEGL" -"ADCQ"
// amd64:"ADDQ" "SETCS" "MOVBLZX" -"NEGL" -"ADCQ"
// loong64: "ADDV" "SGTU"
// ppc64x: "ADDC" -"ADDE" "ADDZE"
// s390x:"ADDC" -"ADDC [$]-1,"
@ -566,7 +566,7 @@ func AddZ(x, y uint) (r, co uint) {
func AddR(x, y, ci uint) uint {
// arm64:"ADDS" "ADCS" -"ADD " -"CMP"
// amd64:"NEGL" "ADCQ" -"SBBQ" -"NEGQ"
// amd64:"NEGL" "ADCQ" -"SETCS" -"MOVBLZX"
// loong64: "ADDV" -"SGTU"
// ppc64x: "ADDC" "ADDE" -"ADDZE"
// s390x:"ADDE" "ADDC [$]-1,"
@ -580,7 +580,7 @@ func AddM(p, q, r *[3]uint) {
var c uint
r[0], c = bits.Add(p[0], q[0], c)
// arm64:"ADCS" -"ADD " -"CMP"
// amd64:"ADCQ" -"NEGL" -"SBBQ" -"NEGQ"
// amd64:"ADCQ" -"NEGL" -"SETCS" -"MOVBLZX"
// s390x:"ADDE" -"ADDC [$]-1,"
r[1], c = bits.Add(p[1], q[1], c)
r[2], c = bits.Add(p[2], q[2], c)
@ -588,7 +588,7 @@ func AddM(p, q, r *[3]uint) {
func Add64(x, y, ci uint64) (r, co uint64) {
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
// loong64: "ADDV" "SGTU"
// ppc64x: "ADDC" "ADDE" "ADDZE"
// s390x:"ADDE" "ADDC [$]-1,"
@ -599,7 +599,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
func Add64C(x, ci uint64) (r, co uint64) {
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
// loong64: "ADDV" "SGTU"
// ppc64x: "ADDC" "ADDE" "ADDZE"
// s390x:"ADDE" "ADDC [$]-1,"
@ -610,7 +610,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
func Add64Z(x, y uint64) (r, co uint64) {
// arm64:"ADDS" "ADC" -"ADCS" -"ADD " -"CMP"
// amd64:"ADDQ" "SBBQ" "NEGQ" -"NEGL" -"ADCQ"
// amd64:"ADDQ" "SETCS" "MOVBLZX" -"NEGL" -"ADCQ"
// loong64: "ADDV" "SGTU"
// ppc64x: "ADDC" -"ADDE" "ADDZE"
// s390x:"ADDC" -"ADDC [$]-1,"
@ -621,7 +621,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
func Add64R(x, y, ci uint64) uint64 {
// arm64:"ADDS" "ADCS" -"ADD " -"CMP"
// amd64:"NEGL" "ADCQ" -"SBBQ" -"NEGQ"
// amd64:"NEGL" "ADCQ" -"SETCS" -"MOVBLZX"
// loong64: "ADDV" -"SGTU"
// ppc64x: "ADDC" "ADDE" -"ADDZE"
// s390x:"ADDE" "ADDC [$]-1,"
@ -635,7 +635,7 @@ func Add64M(p, q, r *[3]uint64) {
var c uint64
r[0], c = bits.Add64(p[0], q[0], c)
// arm64:"ADCS" -"ADD " -"CMP"
// amd64:"ADCQ" -"NEGL" -"SBBQ" -"NEGQ"
// amd64:"ADCQ" -"NEGL" -"SETCS" -"MOVBLZX"
// ppc64x: -"ADDC" "ADDE" -"ADDZE"
// s390x:"ADDE" -"ADDC [$]-1,"
r[1], c = bits.Add64(p[1], q[1], c)
@ -756,7 +756,7 @@ func Add64MultipleChains(a, b, c, d [2]uint64) [2]uint64 {
// --------------- //
func Sub(x, y, ci uint) (r, co uint) {
// amd64:"NEGL" "SBBQ" "NEGQ"
// amd64:"NEGL" "SETCS" "MOVBLZX"
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
// loong64:"SUBV" "SGTU"
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
@ -767,7 +767,7 @@ func Sub(x, y, ci uint) (r, co uint) {
}
func SubC(x, ci uint) (r, co uint) {
// amd64:"NEGL" "SBBQ" "NEGQ"
// amd64:"NEGL" "SBBQ" "SETCS" "MOVBLZX"
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
// loong64:"SUBV" "SGTU"
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
@ -778,7 +778,7 @@ func SubC(x, ci uint) (r, co uint) {
}
func SubZ(x, y uint) (r, co uint) {
// amd64:"SUBQ" "SBBQ" "NEGQ" -"NEGL"
// amd64:"SUBQ" "SETCS" "MOVBLZX" -"NEGL"
// arm64:"SUBS" "NGC" "NEG" -"SBCS" -"ADD" -"SUB " -"CMP"
// loong64:"SUBV" "SGTU"
// ppc64x:"SUBC" -"SUBE" "SUBZE" "NEG"
@ -789,7 +789,7 @@ func SubZ(x, y uint) (r, co uint) {
}
func SubR(x, y, ci uint) uint {
// amd64:"NEGL" "SBBQ" -"NEGQ"
// amd64:"NEGL" "SBBQ" -"SETCS" -"MOVBLZX"
// arm64:"NEGS" "SBCS" -"NGC" -"NEG " -"ADD" -"SUB" -"CMP"
// loong64:"SUBV" -"SGTU"
// ppc64x:"SUBC" "SUBE" -"SUBZE" -"NEG"
@ -801,7 +801,7 @@ func SubR(x, y, ci uint) uint {
func SubM(p, q, r *[3]uint) {
var c uint
r[0], c = bits.Sub(p[0], q[0], c)
// amd64:"SBBQ" -"NEGL" -"NEGQ"
// amd64:"SBBQ" -"NEGL" -"SETCS" -"MOVBLZX"
// arm64:"SBCS" -"NEGS" -"NGC" -"NEG" -"ADD" -"SUB" -"CMP"
// ppc64x:-"SUBC" "SUBE" -"SUBZE" -"NEG"
// s390x:"SUBE"
@ -810,7 +810,7 @@ func SubM(p, q, r *[3]uint) {
}
func Sub64(x, y, ci uint64) (r, co uint64) {
// amd64:"NEGL" "SBBQ" "NEGQ"
// amd64:"NEGL" "SBBQ" "SETCS" "MOVBLZX"
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
// loong64:"SUBV" "SGTU"
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
@ -821,7 +821,7 @@ func Sub64(x, y, ci uint64) (r, co uint64) {
}
func Sub64C(x, ci uint64) (r, co uint64) {
// amd64:"NEGL" "SBBQ" "NEGQ"
// amd64:"NEGL" "SBBQ" "SETCS" "MOVBLZX"
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
// loong64:"SUBV" "SGTU"
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
@ -832,7 +832,7 @@ func Sub64C(x, ci uint64) (r, co uint64) {
}
func Sub64Z(x, y uint64) (r, co uint64) {
// amd64:"SUBQ" "SBBQ" "NEGQ" -"NEGL"
// amd64:"SUBQ" "SETCS" "MOVBLZX" -"NEGL"
// arm64:"SUBS" "NGC" "NEG" -"SBCS" -"ADD" -"SUB " -"CMP"
// loong64:"SUBV" "SGTU"
// ppc64x:"SUBC" -"SUBE" "SUBZE" "NEG"
@ -843,7 +843,7 @@ func Sub64Z(x, y uint64) (r, co uint64) {
}
func Sub64R(x, y, ci uint64) uint64 {
// amd64:"NEGL" "SBBQ" -"NEGQ"
// amd64:"NEGL" "SBBQ" -"SETCS" -"MOVBLZX"
// arm64:"NEGS" "SBCS" -"NGC" -"NEG " -"ADD" -"SUB" -"CMP"
// loong64:"SUBV" -"SGTU"
// ppc64x:"SUBC" "SUBE" -"SUBZE" -"NEG"
@ -855,7 +855,7 @@ func Sub64R(x, y, ci uint64) uint64 {
func Sub64M(p, q, r *[3]uint64) {
var c uint64
r[0], c = bits.Sub64(p[0], q[0], c)
// amd64:"SBBQ" -"NEGL" -"NEGQ"
// amd64:"SBBQ" -"NEGL" -"SETCS" -"MOVBLZX"
// arm64:"SBCS" -"NEGS" -"NGC" -"NEG" -"ADD" -"SUB" -"CMP"
// s390x:"SUBE"
r[1], c = bits.Sub64(p[1], q[1], c)