mirror of
https://github.com/golang/go.git
synced 2026-06-27 03:11:23 +00:00
cmd/compile: shuffle bits.Sub intrinsic generation on amd64
Assuming the CPU recognize SBB RX, RX as a dependency break, this is a no-op however SET is much more canonical and easier to match for. Updates #76056 Change-Id: Icc590dbcc76a8ed2fca7b167cfb66a2d33d4d2d5 Reviewed-on: https://go-review.googlesource.com/c/go/+/778140 LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Jorropo <jorropo.pgm@gmail.com>
This commit is contained in:
parent
8bd95ae848
commit
212065c922
3 changed files with 36 additions and 45 deletions
|
|
@ -27,15 +27,10 @@
|
|||
(Div8u x y) => (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
|
||||
(Div(32|64)F ...) => (DIVS(S|D) ...)
|
||||
|
||||
(Select0 (Add64carry x y c)) =>
|
||||
(Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
(Select1 (Add64carry x y c)) =>
|
||||
(NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
(Select0 (Sub64borrow x y c)) =>
|
||||
(Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
(Select1 (Sub64borrow x y c)) =>
|
||||
(NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
|
||||
(Select0 (Add64carry x y c)) => (Select0 (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
(Select1 (Add64carry x y c)) => (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
(Select0 (Sub64borrow x y c)) => (Select0 (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
(Select1 (Sub64borrow x y c)) => (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
// Optimize ADCQ and friends
|
||||
(ADCQ x (MOVQconst [c]) carry) && is32Bit(c) => (ADCQconst x [int32(c)] carry)
|
||||
(ADCQ x y (FlagEQ)) => (ADDQcarry x y)
|
||||
|
|
@ -46,7 +41,7 @@
|
|||
(SBBQconst x [c] (FlagEQ)) => (SUBQconstborrow x [c])
|
||||
(SUBQborrow x (MOVQconst [c])) && is32Bit(c) => (SUBQconstborrow x [int32(c)])
|
||||
(Select1 (NEGLflags (MOVQconst [0]))) => (FlagEQ)
|
||||
(Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) => x
|
||||
(Select1 (NEGLflags (MOVBQZX (SETB x)))) => x
|
||||
|
||||
|
||||
(Mul64uhilo ...) => (MULQU2 ...)
|
||||
|
|
|
|||
|
|
@ -75874,7 +75874,7 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (Select0 (Add64carry x y c))
|
||||
// result: (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
// result: (Select0 (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
for {
|
||||
if v_0.Op != OpAdd64carry {
|
||||
break
|
||||
|
|
@ -75883,7 +75883,6 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
|
|||
x := v_0.Args[0]
|
||||
y := v_0.Args[1]
|
||||
v.reset(OpSelect0)
|
||||
v.Type = typ.UInt64
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
|
||||
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
|
||||
v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
|
||||
|
|
@ -75894,7 +75893,7 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (Select0 (Sub64borrow x y c))
|
||||
// result: (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
// result: (Select0 (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
|
||||
for {
|
||||
if v_0.Op != OpSub64borrow {
|
||||
break
|
||||
|
|
@ -75903,7 +75902,6 @@ func rewriteValueAMD64_OpSelect0(v *Value) bool {
|
|||
x := v_0.Args[0]
|
||||
y := v_0.Args[1]
|
||||
v.reset(OpSelect0)
|
||||
v.Type = typ.UInt64
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
|
||||
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
|
||||
v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
|
||||
|
|
@ -76018,7 +76016,7 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (Select1 (Add64carry x y c))
|
||||
// result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
// result: (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
for {
|
||||
if v_0.Op != OpAdd64carry {
|
||||
break
|
||||
|
|
@ -76026,9 +76024,8 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
|
|||
c := v_0.Args[2]
|
||||
x := v_0.Args[0]
|
||||
y := v_0.Args[1]
|
||||
v.reset(OpAMD64NEGQ)
|
||||
v.Type = typ.UInt64
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
|
||||
v.reset(OpAMD64MOVBQZX)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8])
|
||||
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
|
||||
v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
|
||||
v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
|
||||
|
|
@ -76042,7 +76039,7 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
// match: (Select1 (Sub64borrow x y c))
|
||||
// result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
// result: (MOVBQZX (SETB <types.Types[types.TUINT8]> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
|
||||
for {
|
||||
if v_0.Op != OpSub64borrow {
|
||||
break
|
||||
|
|
@ -76050,9 +76047,8 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
|
|||
c := v_0.Args[2]
|
||||
x := v_0.Args[0]
|
||||
y := v_0.Args[1]
|
||||
v.reset(OpAMD64NEGQ)
|
||||
v.Type = typ.UInt64
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
|
||||
v.reset(OpAMD64MOVBQZX)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8])
|
||||
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
|
||||
v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
|
||||
v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
|
||||
|
|
@ -76078,18 +76074,18 @@ func rewriteValueAMD64_OpSelect1(v *Value) bool {
|
|||
v.reset(OpAMD64FlagEQ)
|
||||
return true
|
||||
}
|
||||
// match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x))))
|
||||
// match: (Select1 (NEGLflags (MOVBQZX (SETB x))))
|
||||
// result: x
|
||||
for {
|
||||
if v_0.Op != OpAMD64NEGLflags {
|
||||
break
|
||||
}
|
||||
v_0_0 := v_0.Args[0]
|
||||
if v_0_0.Op != OpAMD64NEGQ {
|
||||
if v_0_0.Op != OpAMD64MOVBQZX {
|
||||
break
|
||||
}
|
||||
v_0_0_0 := v_0_0.Args[0]
|
||||
if v_0_0_0.Op != OpAMD64SBBQcarrymask {
|
||||
if v_0_0_0.Op != OpAMD64SETB {
|
||||
break
|
||||
}
|
||||
x := v_0_0_0.Args[0]
|
||||
|
|
|
|||
|
|
@ -535,7 +535,7 @@ func IterateBits8(n uint8) int {
|
|||
|
||||
func Add(x, y, ci uint) (r, co uint) {
|
||||
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
|
||||
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
|
||||
// ppc64x: "ADDC" "ADDE" "ADDZE"
|
||||
// s390x:"ADDE" "ADDC [$]-1,"
|
||||
// riscv64: "ADD" "SLTU"
|
||||
|
|
@ -544,7 +544,7 @@ func Add(x, y, ci uint) (r, co uint) {
|
|||
|
||||
func AddC(x, ci uint) (r, co uint) {
|
||||
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
|
||||
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
|
||||
// loong64: "ADDV" "SGTU"
|
||||
// ppc64x: "ADDC" "ADDE" "ADDZE"
|
||||
// s390x:"ADDE" "ADDC [$]-1,"
|
||||
|
|
@ -555,7 +555,7 @@ func AddC(x, ci uint) (r, co uint) {
|
|||
|
||||
func AddZ(x, y uint) (r, co uint) {
|
||||
// arm64:"ADDS" "ADC" -"ADCS" -"ADD " -"CMP"
|
||||
// amd64:"ADDQ" "SBBQ" "NEGQ" -"NEGL" -"ADCQ"
|
||||
// amd64:"ADDQ" "SETCS" "MOVBLZX" -"NEGL" -"ADCQ"
|
||||
// loong64: "ADDV" "SGTU"
|
||||
// ppc64x: "ADDC" -"ADDE" "ADDZE"
|
||||
// s390x:"ADDC" -"ADDC [$]-1,"
|
||||
|
|
@ -566,7 +566,7 @@ func AddZ(x, y uint) (r, co uint) {
|
|||
|
||||
func AddR(x, y, ci uint) uint {
|
||||
// arm64:"ADDS" "ADCS" -"ADD " -"CMP"
|
||||
// amd64:"NEGL" "ADCQ" -"SBBQ" -"NEGQ"
|
||||
// amd64:"NEGL" "ADCQ" -"SETCS" -"MOVBLZX"
|
||||
// loong64: "ADDV" -"SGTU"
|
||||
// ppc64x: "ADDC" "ADDE" -"ADDZE"
|
||||
// s390x:"ADDE" "ADDC [$]-1,"
|
||||
|
|
@ -580,7 +580,7 @@ func AddM(p, q, r *[3]uint) {
|
|||
var c uint
|
||||
r[0], c = bits.Add(p[0], q[0], c)
|
||||
// arm64:"ADCS" -"ADD " -"CMP"
|
||||
// amd64:"ADCQ" -"NEGL" -"SBBQ" -"NEGQ"
|
||||
// amd64:"ADCQ" -"NEGL" -"SETCS" -"MOVBLZX"
|
||||
// s390x:"ADDE" -"ADDC [$]-1,"
|
||||
r[1], c = bits.Add(p[1], q[1], c)
|
||||
r[2], c = bits.Add(p[2], q[2], c)
|
||||
|
|
@ -588,7 +588,7 @@ func AddM(p, q, r *[3]uint) {
|
|||
|
||||
func Add64(x, y, ci uint64) (r, co uint64) {
|
||||
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
|
||||
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
|
||||
// loong64: "ADDV" "SGTU"
|
||||
// ppc64x: "ADDC" "ADDE" "ADDZE"
|
||||
// s390x:"ADDE" "ADDC [$]-1,"
|
||||
|
|
@ -599,7 +599,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
|
|||
|
||||
func Add64C(x, ci uint64) (r, co uint64) {
|
||||
// arm64:"ADDS" "ADCS" "ADC" -"ADD " -"CMP"
|
||||
// amd64:"NEGL" "ADCQ" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "ADCQ" "SETCS" "MOVBLZX"
|
||||
// loong64: "ADDV" "SGTU"
|
||||
// ppc64x: "ADDC" "ADDE" "ADDZE"
|
||||
// s390x:"ADDE" "ADDC [$]-1,"
|
||||
|
|
@ -610,7 +610,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
|
|||
|
||||
func Add64Z(x, y uint64) (r, co uint64) {
|
||||
// arm64:"ADDS" "ADC" -"ADCS" -"ADD " -"CMP"
|
||||
// amd64:"ADDQ" "SBBQ" "NEGQ" -"NEGL" -"ADCQ"
|
||||
// amd64:"ADDQ" "SETCS" "MOVBLZX" -"NEGL" -"ADCQ"
|
||||
// loong64: "ADDV" "SGTU"
|
||||
// ppc64x: "ADDC" -"ADDE" "ADDZE"
|
||||
// s390x:"ADDC" -"ADDC [$]-1,"
|
||||
|
|
@ -621,7 +621,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
|
|||
|
||||
func Add64R(x, y, ci uint64) uint64 {
|
||||
// arm64:"ADDS" "ADCS" -"ADD " -"CMP"
|
||||
// amd64:"NEGL" "ADCQ" -"SBBQ" -"NEGQ"
|
||||
// amd64:"NEGL" "ADCQ" -"SETCS" -"MOVBLZX"
|
||||
// loong64: "ADDV" -"SGTU"
|
||||
// ppc64x: "ADDC" "ADDE" -"ADDZE"
|
||||
// s390x:"ADDE" "ADDC [$]-1,"
|
||||
|
|
@ -635,7 +635,7 @@ func Add64M(p, q, r *[3]uint64) {
|
|||
var c uint64
|
||||
r[0], c = bits.Add64(p[0], q[0], c)
|
||||
// arm64:"ADCS" -"ADD " -"CMP"
|
||||
// amd64:"ADCQ" -"NEGL" -"SBBQ" -"NEGQ"
|
||||
// amd64:"ADCQ" -"NEGL" -"SETCS" -"MOVBLZX"
|
||||
// ppc64x: -"ADDC" "ADDE" -"ADDZE"
|
||||
// s390x:"ADDE" -"ADDC [$]-1,"
|
||||
r[1], c = bits.Add64(p[1], q[1], c)
|
||||
|
|
@ -756,7 +756,7 @@ func Add64MultipleChains(a, b, c, d [2]uint64) [2]uint64 {
|
|||
// --------------- //
|
||||
|
||||
func Sub(x, y, ci uint) (r, co uint) {
|
||||
// amd64:"NEGL" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "SETCS" "MOVBLZX"
|
||||
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
|
||||
// loong64:"SUBV" "SGTU"
|
||||
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
|
||||
|
|
@ -767,7 +767,7 @@ func Sub(x, y, ci uint) (r, co uint) {
|
|||
}
|
||||
|
||||
func SubC(x, ci uint) (r, co uint) {
|
||||
// amd64:"NEGL" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "SBBQ" "SETCS" "MOVBLZX"
|
||||
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
|
||||
// loong64:"SUBV" "SGTU"
|
||||
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
|
||||
|
|
@ -778,7 +778,7 @@ func SubC(x, ci uint) (r, co uint) {
|
|||
}
|
||||
|
||||
func SubZ(x, y uint) (r, co uint) {
|
||||
// amd64:"SUBQ" "SBBQ" "NEGQ" -"NEGL"
|
||||
// amd64:"SUBQ" "SETCS" "MOVBLZX" -"NEGL"
|
||||
// arm64:"SUBS" "NGC" "NEG" -"SBCS" -"ADD" -"SUB " -"CMP"
|
||||
// loong64:"SUBV" "SGTU"
|
||||
// ppc64x:"SUBC" -"SUBE" "SUBZE" "NEG"
|
||||
|
|
@ -789,7 +789,7 @@ func SubZ(x, y uint) (r, co uint) {
|
|||
}
|
||||
|
||||
func SubR(x, y, ci uint) uint {
|
||||
// amd64:"NEGL" "SBBQ" -"NEGQ"
|
||||
// amd64:"NEGL" "SBBQ" -"SETCS" -"MOVBLZX"
|
||||
// arm64:"NEGS" "SBCS" -"NGC" -"NEG " -"ADD" -"SUB" -"CMP"
|
||||
// loong64:"SUBV" -"SGTU"
|
||||
// ppc64x:"SUBC" "SUBE" -"SUBZE" -"NEG"
|
||||
|
|
@ -801,7 +801,7 @@ func SubR(x, y, ci uint) uint {
|
|||
func SubM(p, q, r *[3]uint) {
|
||||
var c uint
|
||||
r[0], c = bits.Sub(p[0], q[0], c)
|
||||
// amd64:"SBBQ" -"NEGL" -"NEGQ"
|
||||
// amd64:"SBBQ" -"NEGL" -"SETCS" -"MOVBLZX"
|
||||
// arm64:"SBCS" -"NEGS" -"NGC" -"NEG" -"ADD" -"SUB" -"CMP"
|
||||
// ppc64x:-"SUBC" "SUBE" -"SUBZE" -"NEG"
|
||||
// s390x:"SUBE"
|
||||
|
|
@ -810,7 +810,7 @@ func SubM(p, q, r *[3]uint) {
|
|||
}
|
||||
|
||||
func Sub64(x, y, ci uint64) (r, co uint64) {
|
||||
// amd64:"NEGL" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "SBBQ" "SETCS" "MOVBLZX"
|
||||
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
|
||||
// loong64:"SUBV" "SGTU"
|
||||
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
|
||||
|
|
@ -821,7 +821,7 @@ func Sub64(x, y, ci uint64) (r, co uint64) {
|
|||
}
|
||||
|
||||
func Sub64C(x, ci uint64) (r, co uint64) {
|
||||
// amd64:"NEGL" "SBBQ" "NEGQ"
|
||||
// amd64:"NEGL" "SBBQ" "SETCS" "MOVBLZX"
|
||||
// arm64:"NEGS" "SBCS" "NGC" "NEG" -"ADD" -"SUB" -"CMP"
|
||||
// loong64:"SUBV" "SGTU"
|
||||
// ppc64x:"SUBC" "SUBE" "SUBZE" "NEG"
|
||||
|
|
@ -832,7 +832,7 @@ func Sub64C(x, ci uint64) (r, co uint64) {
|
|||
}
|
||||
|
||||
func Sub64Z(x, y uint64) (r, co uint64) {
|
||||
// amd64:"SUBQ" "SBBQ" "NEGQ" -"NEGL"
|
||||
// amd64:"SUBQ" "SETCS" "MOVBLZX" -"NEGL"
|
||||
// arm64:"SUBS" "NGC" "NEG" -"SBCS" -"ADD" -"SUB " -"CMP"
|
||||
// loong64:"SUBV" "SGTU"
|
||||
// ppc64x:"SUBC" -"SUBE" "SUBZE" "NEG"
|
||||
|
|
@ -843,7 +843,7 @@ func Sub64Z(x, y uint64) (r, co uint64) {
|
|||
}
|
||||
|
||||
func Sub64R(x, y, ci uint64) uint64 {
|
||||
// amd64:"NEGL" "SBBQ" -"NEGQ"
|
||||
// amd64:"NEGL" "SBBQ" -"SETCS" -"MOVBLZX"
|
||||
// arm64:"NEGS" "SBCS" -"NGC" -"NEG " -"ADD" -"SUB" -"CMP"
|
||||
// loong64:"SUBV" -"SGTU"
|
||||
// ppc64x:"SUBC" "SUBE" -"SUBZE" -"NEG"
|
||||
|
|
@ -855,7 +855,7 @@ func Sub64R(x, y, ci uint64) uint64 {
|
|||
func Sub64M(p, q, r *[3]uint64) {
|
||||
var c uint64
|
||||
r[0], c = bits.Sub64(p[0], q[0], c)
|
||||
// amd64:"SBBQ" -"NEGL" -"NEGQ"
|
||||
// amd64:"SBBQ" -"NEGL" -"SETCS" -"MOVBLZX"
|
||||
// arm64:"SBCS" -"NEGS" -"NGC" -"NEG" -"ADD" -"SUB" -"CMP"
|
||||
// s390x:"SUBE"
|
||||
r[1], c = bits.Sub64(p[1], q[1], c)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue