mirror of
https://github.com/golang/go.git
synced 2026-02-06 18:00:01 +00:00
cmd/compile: on amd64 use 32bits copies for 64bits copies of 32bits values
Fixes #76449 This saves a single byte for the REX prefix per OpCopy it triggers on. Change-Id: I1eab364d07354555ba2f23ffd2f9c522d4a04bd0 Reviewed-on: https://go-review.googlesource.com/c/go/+/731640 Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Jorropo <jorropo.pgm@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
8f739162e6
commit
478d86446e
7 changed files with 51 additions and 23 deletions
|
|
@ -43,6 +43,10 @@ func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
|
|||
}
|
||||
}
|
||||
|
||||
func isGPReg(r int16) bool {
|
||||
return x86.REG_AL <= r && r <= x86.REG_R15
|
||||
}
|
||||
|
||||
func isFPReg(r int16) bool {
|
||||
return x86.REG_X0 <= r && r <= x86.REG_Z31
|
||||
}
|
||||
|
|
@ -1225,14 +1229,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
if v.Type.IsMemory() {
|
||||
return
|
||||
}
|
||||
x := v.Args[0].Reg()
|
||||
arg := v.Args[0]
|
||||
x := arg.Reg()
|
||||
y := v.Reg()
|
||||
if v.Type.IsSIMD() {
|
||||
x = simdOrMaskReg(v.Args[0])
|
||||
x = simdOrMaskReg(arg)
|
||||
y = simdOrMaskReg(v)
|
||||
}
|
||||
if x != y {
|
||||
opregreg(s, moveByRegsWidth(y, x, v.Type.Size()), y, x)
|
||||
width := v.Type.Size()
|
||||
if width == 8 && isGPReg(y) && ssa.ZeroUpper32Bits(arg, 3) {
|
||||
// The source was naturally zext-ed from 32 to 64 bits,
|
||||
// but we are asked to do a full 64-bit copy.
|
||||
// Save the REX prefix byte in I-CACHE by using a 32-bit move,
|
||||
// since it zeroes the upper 32 bits anyway.
|
||||
width = 4
|
||||
}
|
||||
opregreg(s, moveByRegsWidth(y, x, width), y, x)
|
||||
}
|
||||
case ssa.OpLoadReg:
|
||||
if v.Type.IsFlags() {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,6 @@
|
|||
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
|
||||
|
||||
// See comments in ARM64latelower.rules for why these are here.
|
||||
(MOVLQZX x) && zeroUpper32Bits(x,3) => x
|
||||
(MOVWQZX x) && zeroUpper48Bits(x,3) => x
|
||||
(MOVBQZX x) && zeroUpper56Bits(x,3) => x
|
||||
(MOVLQZX x) && ZeroUpper32Bits(x,3) => x
|
||||
(MOVWQZX x) && ZeroUpper48Bits(x,3) => x
|
||||
(MOVBQZX x) && ZeroUpper56Bits(x,3) => x
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
(MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => x
|
||||
|
||||
// omit unsigned extension
|
||||
(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
|
||||
(MOVWUreg x) && ZeroUpper32Bits(x, 3) => x
|
||||
|
||||
// don't extend after proper load
|
||||
(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
|
||||
|
|
|
|||
|
|
@ -1351,7 +1351,7 @@ func overlap(offset1, size1, offset2, size2 int64) bool {
|
|||
// check if value zeroes out upper 32-bit of 64-bit register.
|
||||
// depth limits recursion depth. In AMD64.rules 3 is used as limit,
|
||||
// because it catches same amount of cases as 4.
|
||||
func zeroUpper32Bits(x *Value, depth int) bool {
|
||||
func ZeroUpper32Bits(x *Value, depth int) bool {
|
||||
if x.Type.IsSigned() && x.Type.Size() < 8 {
|
||||
// If the value is signed, it might get re-sign-extended
|
||||
// during spill and restore. See issue 68227.
|
||||
|
|
@ -1368,6 +1368,8 @@ func zeroUpper32Bits(x *Value, depth int) bool {
|
|||
OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
|
||||
OpAMD64SHLL, OpAMD64SHLLconst:
|
||||
return true
|
||||
case OpAMD64MOVQconst:
|
||||
return uint64(uint32(x.AuxInt)) == uint64(x.AuxInt)
|
||||
case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
|
||||
OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
|
||||
OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
|
||||
|
|
@ -1383,7 +1385,7 @@ func zeroUpper32Bits(x *Value, depth int) bool {
|
|||
return false
|
||||
}
|
||||
for i := range x.Args {
|
||||
if !zeroUpper32Bits(x.Args[i], depth-1) {
|
||||
if !ZeroUpper32Bits(x.Args[i], depth-1) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
@ -1393,14 +1395,16 @@ func zeroUpper32Bits(x *Value, depth int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
|
||||
func zeroUpper48Bits(x *Value, depth int) bool {
|
||||
// ZeroUpper48Bits is similar to ZeroUpper32Bits, but for upper 48 bits.
|
||||
func ZeroUpper48Bits(x *Value, depth int) bool {
|
||||
if x.Type.IsSigned() && x.Type.Size() < 8 {
|
||||
return false
|
||||
}
|
||||
switch x.Op {
|
||||
case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
|
||||
return true
|
||||
case OpAMD64MOVQconst, OpAMD64MOVLconst:
|
||||
return uint64(uint16(x.AuxInt)) == uint64(x.AuxInt)
|
||||
case OpArg: // note: but not ArgIntReg
|
||||
return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
|
||||
case OpPhi, OpSelect0, OpSelect1:
|
||||
|
|
@ -1410,7 +1414,7 @@ func zeroUpper48Bits(x *Value, depth int) bool {
|
|||
return false
|
||||
}
|
||||
for i := range x.Args {
|
||||
if !zeroUpper48Bits(x.Args[i], depth-1) {
|
||||
if !ZeroUpper48Bits(x.Args[i], depth-1) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
@ -1420,14 +1424,16 @@ func zeroUpper48Bits(x *Value, depth int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
|
||||
func zeroUpper56Bits(x *Value, depth int) bool {
|
||||
// ZeroUpper56Bits is similar to ZeroUpper32Bits, but for upper 56 bits.
|
||||
func ZeroUpper56Bits(x *Value, depth int) bool {
|
||||
if x.Type.IsSigned() && x.Type.Size() < 8 {
|
||||
return false
|
||||
}
|
||||
switch x.Op {
|
||||
case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
|
||||
return true
|
||||
case OpAMD64MOVQconst, OpAMD64MOVLconst:
|
||||
return uint64(uint8(x.AuxInt)) == uint64(x.AuxInt)
|
||||
case OpArg: // note: but not ArgIntReg
|
||||
return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
|
||||
case OpPhi, OpSelect0, OpSelect1:
|
||||
|
|
@ -1437,7 +1443,7 @@ func zeroUpper56Bits(x *Value, depth int) bool {
|
|||
return false
|
||||
}
|
||||
for i := range x.Args {
|
||||
if !zeroUpper56Bits(x.Args[i], depth-1) {
|
||||
if !ZeroUpper56Bits(x.Args[i], depth-1) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,11 +30,11 @@ func rewriteValueAMD64latelower(v *Value) bool {
|
|||
func rewriteValueAMD64latelower_OpAMD64MOVBQZX(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVBQZX x)
|
||||
// cond: zeroUpper56Bits(x,3)
|
||||
// cond: ZeroUpper56Bits(x,3)
|
||||
// result: x
|
||||
for {
|
||||
x := v_0
|
||||
if !(zeroUpper56Bits(x, 3)) {
|
||||
if !(ZeroUpper56Bits(x, 3)) {
|
||||
break
|
||||
}
|
||||
v.copyOf(x)
|
||||
|
|
@ -45,11 +45,11 @@ func rewriteValueAMD64latelower_OpAMD64MOVBQZX(v *Value) bool {
|
|||
func rewriteValueAMD64latelower_OpAMD64MOVLQZX(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVLQZX x)
|
||||
// cond: zeroUpper32Bits(x,3)
|
||||
// cond: ZeroUpper32Bits(x,3)
|
||||
// result: x
|
||||
for {
|
||||
x := v_0
|
||||
if !(zeroUpper32Bits(x, 3)) {
|
||||
if !(ZeroUpper32Bits(x, 3)) {
|
||||
break
|
||||
}
|
||||
v.copyOf(x)
|
||||
|
|
@ -60,11 +60,11 @@ func rewriteValueAMD64latelower_OpAMD64MOVLQZX(v *Value) bool {
|
|||
func rewriteValueAMD64latelower_OpAMD64MOVWQZX(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVWQZX x)
|
||||
// cond: zeroUpper48Bits(x,3)
|
||||
// cond: ZeroUpper48Bits(x,3)
|
||||
// result: x
|
||||
for {
|
||||
x := v_0
|
||||
if !(zeroUpper48Bits(x, 3)) {
|
||||
if !(ZeroUpper48Bits(x, 3)) {
|
||||
break
|
||||
}
|
||||
v.copyOf(x)
|
||||
|
|
|
|||
|
|
@ -653,11 +653,11 @@ func rewriteValueARM64latelower_OpARM64MOVHreg(v *Value) bool {
|
|||
func rewriteValueARM64latelower_OpARM64MOVWUreg(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (MOVWUreg x)
|
||||
// cond: zeroUpper32Bits(x, 3)
|
||||
// cond: ZeroUpper32Bits(x, 3)
|
||||
// result: x
|
||||
for {
|
||||
x := v_0
|
||||
if !(zeroUpper32Bits(x, 3)) {
|
||||
if !(ZeroUpper32Bits(x, 3)) {
|
||||
break
|
||||
}
|
||||
v.copyOf(x)
|
||||
|
|
|
|||
|
|
@ -33,3 +33,12 @@ func contiguousMaskConstants() (out [64]uint64) {
|
|||
out[3] = 0xFFFFFFFE00000001
|
||||
return
|
||||
}
|
||||
|
||||
func issue76449_1() (_, _, _ uint64) {
|
||||
// amd64:-"MOVQ"
|
||||
return 0, 0, 0
|
||||
}
|
||||
func issue76449_2() (_, _, _ uint64) {
|
||||
// amd64:-"MOVQ"
|
||||
return 1, 2, 1
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue