cmd/compile: optimize slice bounds checking with SUB/SUBconst comparisons

Optimize ARM64 code generation for slice bounds checking by recognizing
patterns where comparisons to zero involve SUB or SUBconst operations.
This change adds SSA opt rules to simplify:
 (CMPconst [0] (SUB x y)) => (CMP x y)

The optimizations apply to EQ, NE, ULE, and UGT comparisons, enabling
more efficient bounds checking for slice operations.

Code size improvement:
compile: .text:    9088004  ->  9065988 (-0.24%)
etcd:    .text:    10500276 -> 10497092 (-0.03%)
Change-Id: I467cb27674351652bcacc52b87e1f19677bd46a8
Reviewed-on: https://go-review.googlesource.com/c/go/+/679915
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
This commit is contained in:
Alexander Musman 2025-06-07 13:18:58 +03:00 committed by Gopher Robot
parent f11599b0b9
commit dcb479c2f9
3 changed files with 206 additions and 0 deletions

View file

@ -683,6 +683,14 @@
((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no) ((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no)
((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no) ((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no)
((ULE|UGT) (CMPconst [0] x)) => ((EQ|NE) (CMPconst [0] x))
((ULE|UGT) (CMPWconst [0] x)) => ((EQ|NE) (CMPWconst [0] x))
((Z|NZ) sub:(SUB x y)) && sub.Uses == 1 => ((EQ|NE) (CMP x y))
((ZW|NZW) sub:(SUB x y)) && sub.Uses == 1 => ((EQ|NE) (CMPW x y))
((Z|NZ) sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPconst [c] y))
((ZW|NZW) sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPWconst [int32(c)] y))
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL <x.Type> x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL <x.Type> x y)) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL <x.Type> x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL <x.Type> x y)) yes no)
((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW <x.Type> x y)) yes no) ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW <x.Type> x y)) yes no)

View file

@ -25045,6 +25045,37 @@ func rewriteBlockARM64(b *Block) bool {
b.resetWithControl(BlockARM64FGE, cc) b.resetWithControl(BlockARM64FGE, cc)
return true return true
} }
// match: (NZ sub:(SUB x y))
// cond: sub.Uses == 1
// result: (NE (CMP x y))
for b.Controls[0].Op == OpARM64SUB {
sub := b.Controls[0]
y := sub.Args[1]
x := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags)
v0.AddArg2(x, y)
b.resetWithControl(BlockARM64NE, v0)
return true
}
// match: (NZ sub:(SUBconst [c] y))
// cond: sub.Uses == 1
// result: (NE (CMPconst [c] y))
for b.Controls[0].Op == OpARM64SUBconst {
sub := b.Controls[0]
c := auxIntToInt64(sub.AuxInt)
y := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags)
v0.AuxInt = int64ToAuxInt(c)
v0.AddArg(y)
b.resetWithControl(BlockARM64NE, v0)
return true
}
// match: (NZ (ANDconst [c] x) yes no) // match: (NZ (ANDconst [c] x) yes no)
// cond: oneBit(c) // cond: oneBit(c)
// result: (TBNZ [int64(ntz64(c))] x yes no) // result: (TBNZ [int64(ntz64(c))] x yes no)
@ -25083,6 +25114,37 @@ func rewriteBlockARM64(b *Block) bool {
return true return true
} }
case BlockARM64NZW: case BlockARM64NZW:
// match: (NZW sub:(SUB x y))
// cond: sub.Uses == 1
// result: (NE (CMPW x y))
for b.Controls[0].Op == OpARM64SUB {
sub := b.Controls[0]
y := sub.Args[1]
x := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags)
v0.AddArg2(x, y)
b.resetWithControl(BlockARM64NE, v0)
return true
}
// match: (NZW sub:(SUBconst [c] y))
// cond: sub.Uses == 1
// result: (NE (CMPWconst [int32(c)] y))
for b.Controls[0].Op == OpARM64SUBconst {
sub := b.Controls[0]
c := auxIntToInt64(sub.AuxInt)
y := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags)
v0.AuxInt = int32ToAuxInt(int32(c))
v0.AddArg(y)
b.resetWithControl(BlockARM64NE, v0)
return true
}
// match: (NZW (ANDconst [c] x) yes no) // match: (NZW (ANDconst [c] x) yes no)
// cond: oneBit(int64(uint32(c))) // cond: oneBit(int64(uint32(c)))
// result: (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) // result: (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
@ -25312,6 +25374,34 @@ func rewriteBlockARM64(b *Block) bool {
return true return true
} }
case BlockARM64UGT: case BlockARM64UGT:
// match: (UGT (CMPconst [0] x))
// result: (NE (CMPconst [0] x))
for b.Controls[0].Op == OpARM64CMPconst {
v_0 := b.Controls[0]
if auxIntToInt64(v_0.AuxInt) != 0 {
break
}
x := v_0.Args[0]
v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags)
v0.AuxInt = int64ToAuxInt(0)
v0.AddArg(x)
b.resetWithControl(BlockARM64NE, v0)
return true
}
// match: (UGT (CMPWconst [0] x))
// result: (NE (CMPWconst [0] x))
for b.Controls[0].Op == OpARM64CMPWconst {
v_0 := b.Controls[0]
if auxIntToInt32(v_0.AuxInt) != 0 {
break
}
x := v_0.Args[0]
v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags)
v0.AuxInt = int32ToAuxInt(0)
v0.AddArg(x)
b.resetWithControl(BlockARM64NE, v0)
return true
}
// match: (UGT (FlagConstant [fc]) yes no) // match: (UGT (FlagConstant [fc]) yes no)
// cond: fc.ugt() // cond: fc.ugt()
// result: (First yes no) // result: (First yes no)
@ -25346,6 +25436,34 @@ func rewriteBlockARM64(b *Block) bool {
return true return true
} }
case BlockARM64ULE: case BlockARM64ULE:
// match: (ULE (CMPconst [0] x))
// result: (EQ (CMPconst [0] x))
for b.Controls[0].Op == OpARM64CMPconst {
v_0 := b.Controls[0]
if auxIntToInt64(v_0.AuxInt) != 0 {
break
}
x := v_0.Args[0]
v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags)
v0.AuxInt = int64ToAuxInt(0)
v0.AddArg(x)
b.resetWithControl(BlockARM64EQ, v0)
return true
}
// match: (ULE (CMPWconst [0] x))
// result: (EQ (CMPWconst [0] x))
for b.Controls[0].Op == OpARM64CMPWconst {
v_0 := b.Controls[0]
if auxIntToInt32(v_0.AuxInt) != 0 {
break
}
x := v_0.Args[0]
v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags)
v0.AuxInt = int32ToAuxInt(0)
v0.AddArg(x)
b.resetWithControl(BlockARM64EQ, v0)
return true
}
// match: (ULE (FlagConstant [fc]) yes no) // match: (ULE (FlagConstant [fc]) yes no)
// cond: fc.ule() // cond: fc.ule()
// result: (First yes no) // result: (First yes no)
@ -25414,6 +25532,37 @@ func rewriteBlockARM64(b *Block) bool {
return true return true
} }
case BlockARM64Z: case BlockARM64Z:
// match: (Z sub:(SUB x y))
// cond: sub.Uses == 1
// result: (EQ (CMP x y))
for b.Controls[0].Op == OpARM64SUB {
sub := b.Controls[0]
y := sub.Args[1]
x := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags)
v0.AddArg2(x, y)
b.resetWithControl(BlockARM64EQ, v0)
return true
}
// match: (Z sub:(SUBconst [c] y))
// cond: sub.Uses == 1
// result: (EQ (CMPconst [c] y))
for b.Controls[0].Op == OpARM64SUBconst {
sub := b.Controls[0]
c := auxIntToInt64(sub.AuxInt)
y := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags)
v0.AuxInt = int64ToAuxInt(c)
v0.AddArg(y)
b.resetWithControl(BlockARM64EQ, v0)
return true
}
// match: (Z (ANDconst [c] x) yes no) // match: (Z (ANDconst [c] x) yes no)
// cond: oneBit(c) // cond: oneBit(c)
// result: (TBZ [int64(ntz64(c))] x yes no) // result: (TBZ [int64(ntz64(c))] x yes no)
@ -25452,6 +25601,37 @@ func rewriteBlockARM64(b *Block) bool {
return true return true
} }
case BlockARM64ZW: case BlockARM64ZW:
// match: (ZW sub:(SUB x y))
// cond: sub.Uses == 1
// result: (EQ (CMPW x y))
for b.Controls[0].Op == OpARM64SUB {
sub := b.Controls[0]
y := sub.Args[1]
x := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags)
v0.AddArg2(x, y)
b.resetWithControl(BlockARM64EQ, v0)
return true
}
// match: (ZW sub:(SUBconst [c] y))
// cond: sub.Uses == 1
// result: (EQ (CMPWconst [int32(c)] y))
for b.Controls[0].Op == OpARM64SUBconst {
sub := b.Controls[0]
c := auxIntToInt64(sub.AuxInt)
y := sub.Args[0]
if !(sub.Uses == 1) {
break
}
v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags)
v0.AuxInt = int32ToAuxInt(int32(c))
v0.AddArg(y)
b.resetWithControl(BlockARM64EQ, v0)
return true
}
// match: (ZW (ANDconst [c] x) yes no) // match: (ZW (ANDconst [c] x) yes no)
// cond: oneBit(int64(uint32(c))) // cond: oneBit(int64(uint32(c)))
// result: (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) // result: (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)

View file

@ -429,3 +429,21 @@ func Slice0(p *struct{}, i int) []struct{} {
// amd64:-"MULQ" // amd64:-"MULQ"
return unsafe.Slice(p, i) return unsafe.Slice(p, i)
} }
// --------------------------------------- //
// Code generation for slice bounds //
// checking comparison //
// --------------------------------------- //
func SlicePut(a []byte, c uint8) []byte {
// arm64:`CBZ\tR1`
a[0] = c
// arm64:`CMP\t\$1, R1`
a = a[1:]
a[0] = c
// arm64:`CMP\t\$2, R1`
a = a[1:]
a[0] = c
a = a[1:]
return a
}