cmd/compile: optimize small constant-sized MemEq

Add optimization patterns for MemEq with small constant sizes
(3-32 bytes). These patterns help to avoid runtime calls for
small sizes.

For sizes 3-16, combine two chunks loading and comparison.
For sizes 17-32, combine a 16-byte comparison with the remaining bytes.
This change may increase binary size slightly due to inline expansion,
but improves performance for code with many small memequals,
e.g. DecodehealingTracker benchmark on arm64:

shortname: minio
pkg: github.com/minio/minio/cmd
                               │  Orig.res   │              Uexp.res              │
                               │   sec/op    │   sec/op     vs base               │
DecodehealingTracker-4           842.5n ± 1%   794.0n ± 3%  -5.75% (p=0.000 n=10)
AppendMsgResyncTargetsInfo-4     8.472n ± 0%   8.472n ± 0%       ~ (p=0.582 n=10)
DataUpdateTracker-4              2.856µ ± 2%   2.804µ ± 3%       ~ (p=0.210 n=10)
MarshalMsgdataUsageCacheInfo-4   131.2n ± 1%   131.6n ± 2%       ~ (p=0.494 n=10)
geomean                          227.4n        223.2n       -1.86%

                             │   Orig.res   │              Uexp.res               │
                             │     B/s      │     B/s       vs base               │
DecodehealingTracker-4         352.0Mi ± 1%   373.5Mi ± 3%  +6.10% (p=0.000 n=10)
AppendMsgResyncTargetsInfo-4   1.099Gi ± 0%   1.099Gi ± 0%       ~ (p=0.183 n=10)
DataUpdateTracker-4            341.8Ki ± 3%   351.6Ki ± 3%       ~ (p=0.286 n=10)
geomean                        50.95Mi        52.46Mi       +2.96%

Change-Id: If3d7e7395656d5f36e3ab303a71044293d17bc3e
Reviewed-on: https://go-review.googlesource.com/c/go/+/688195
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Alexander Musman 2025-07-13 11:09:58 +03:00 committed by Keith Randall
parent 26ffe78b8c
commit c7258178cd
4 changed files with 225 additions and 5 deletions

View file

@ -1560,6 +1560,43 @@
(MemEq p q _ _) && isSamePtr(p, q) => (ConstBool <typ.Bool> [true])
// 3-32 bytes memeq (enabled only with support of unaligned loads and 8-byte max word size)
(MemEq p q (Const64 [c]) mem)
&& (c == 3 || c == 5 || c == 9 || c == 17)
&& canLoadUnaligned(config)
&& config.RegSize == 8
=> (AndB (MemEq p q (Const64 <typ.Int64> [c-1]) mem)
(Eq8 (Load <typ.Int8> (OffPtr <p.Type> p [c-1]) mem) (Load <typ.Int8> (OffPtr <q.Type> q [c-1]) mem)))
(MemEq p q (Const64 [c]) mem)
&& (c == 6 || c == 10 || c == 18)
&& canLoadUnaligned(config)
&& config.RegSize == 8
=> (AndB (MemEq p q (Const64 <typ.Int64> [c-2]) mem)
(Eq16 (Load <typ.Int16> (OffPtr <p.Type> p [c-2]) mem) (Load <typ.Int16> (OffPtr <q.Type> q [c-2]) mem)))
(MemEq p q (Const64 [c]) mem)
&& (c == 7 || c == 11 || c == 19 || c == 20)
&& canLoadUnaligned(config)
&& config.RegSize == 8
=> (AndB (MemEq p q (Const64 <typ.Int64> [min(c-3,16)]) mem)
(Eq32 (Load <typ.Int32> (OffPtr <p.Type> p [c-4]) mem) (Load <typ.Int32> (OffPtr <q.Type> q [c-4]) mem)))
(MemEq p q (Const64 [c]) mem)
&& ((c >= 12 && c <= 16) || (c >= 21 && c <= 24))
&& canLoadUnaligned(config)
&& config.RegSize == 8
=> (AndB (MemEq p q (Const64 <typ.Int64> [8 + int64(bool2int(c>16))*8]) mem)
(Eq64 (Load <typ.Int64> (OffPtr <p.Type> p [c-8]) mem) (Load <typ.Int64> (OffPtr <q.Type> q [c-8]) mem)))
(MemEq p q (Const64 [c]) mem)
&& c >= 25 && c <= 32
&& canLoadUnaligned(config)
&& config.RegSize == 8
=> (AndB (MemEq p q (Const64 <typ.Int64> [16]) mem)
(MemEq (OffPtr <p.Type> p [16]) (OffPtr <q.Type> q [16]) (Const64 <typ.Int64> [c-16]) mem))
// Turn known-size calls to memclrNoHeapPointers into a Zero.
// Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
(SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))

View file

@ -2786,3 +2786,12 @@ func imakeOfStructMake(v *Value) *Value {
}
return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
}
// bool2int converts bool to int: true to 1, false to 0
func bool2int(x bool) int {
var b int
if x {
b = 1
}
return b
}

View file

@ -14983,6 +14983,174 @@ func rewriteValuegeneric_OpMemEq(v *Value) bool {
}
break
}
// match: (MemEq p q (Const64 [c]) mem)
// cond: (c == 3 || c == 5 || c == 9 || c == 17) && canLoadUnaligned(config) && config.RegSize == 8
// result: (AndB (MemEq p q (Const64 <typ.Int64> [c-1]) mem) (Eq8 (Load <typ.Int8> (OffPtr <p.Type> p [c-1]) mem) (Load <typ.Int8> (OffPtr <q.Type> q [c-1]) mem)))
for {
p := v_0
q := v_1
if v_2.Op != OpConst64 {
break
}
c := auxIntToInt64(v_2.AuxInt)
mem := v_3
if !((c == 3 || c == 5 || c == 9 || c == 17) && canLoadUnaligned(config) && config.RegSize == 8) {
break
}
v.reset(OpAndB)
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
v1.AuxInt = int64ToAuxInt(c - 1)
v0.AddArg4(p, q, v1, mem)
v2 := b.NewValue0(v.Pos, OpEq8, typ.Bool)
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int8)
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
v4.AuxInt = int64ToAuxInt(c - 1)
v4.AddArg(p)
v3.AddArg2(v4, mem)
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int8)
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
v6.AuxInt = int64ToAuxInt(c - 1)
v6.AddArg(q)
v5.AddArg2(v6, mem)
v2.AddArg2(v3, v5)
v.AddArg2(v0, v2)
return true
}
// match: (MemEq p q (Const64 [c]) mem)
// cond: (c == 6 || c == 10 || c == 18) && canLoadUnaligned(config) && config.RegSize == 8
// result: (AndB (MemEq p q (Const64 <typ.Int64> [c-2]) mem) (Eq16 (Load <typ.Int16> (OffPtr <p.Type> p [c-2]) mem) (Load <typ.Int16> (OffPtr <q.Type> q [c-2]) mem)))
for {
p := v_0
q := v_1
if v_2.Op != OpConst64 {
break
}
c := auxIntToInt64(v_2.AuxInt)
mem := v_3
if !((c == 6 || c == 10 || c == 18) && canLoadUnaligned(config) && config.RegSize == 8) {
break
}
v.reset(OpAndB)
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
v1.AuxInt = int64ToAuxInt(c - 2)
v0.AddArg4(p, q, v1, mem)
v2 := b.NewValue0(v.Pos, OpEq16, typ.Bool)
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
v4.AuxInt = int64ToAuxInt(c - 2)
v4.AddArg(p)
v3.AddArg2(v4, mem)
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
v6.AuxInt = int64ToAuxInt(c - 2)
v6.AddArg(q)
v5.AddArg2(v6, mem)
v2.AddArg2(v3, v5)
v.AddArg2(v0, v2)
return true
}
// match: (MemEq p q (Const64 [c]) mem)
// cond: (c == 7 || c == 11 || c == 19 || c == 20) && canLoadUnaligned(config) && config.RegSize == 8
// result: (AndB (MemEq p q (Const64 <typ.Int64> [min(c-3,16)]) mem) (Eq32 (Load <typ.Int32> (OffPtr <p.Type> p [c-4]) mem) (Load <typ.Int32> (OffPtr <q.Type> q [c-4]) mem)))
for {
p := v_0
q := v_1
if v_2.Op != OpConst64 {
break
}
c := auxIntToInt64(v_2.AuxInt)
mem := v_3
if !((c == 7 || c == 11 || c == 19 || c == 20) && canLoadUnaligned(config) && config.RegSize == 8) {
break
}
v.reset(OpAndB)
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
v1.AuxInt = int64ToAuxInt(min(c-3, 16))
v0.AddArg4(p, q, v1, mem)
v2 := b.NewValue0(v.Pos, OpEq32, typ.Bool)
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
v4.AuxInt = int64ToAuxInt(c - 4)
v4.AddArg(p)
v3.AddArg2(v4, mem)
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
v6.AuxInt = int64ToAuxInt(c - 4)
v6.AddArg(q)
v5.AddArg2(v6, mem)
v2.AddArg2(v3, v5)
v.AddArg2(v0, v2)
return true
}
// match: (MemEq p q (Const64 [c]) mem)
// cond: ((c >= 12 && c <= 16) || (c >= 21 && c <= 24)) && canLoadUnaligned(config) && config.RegSize == 8
// result: (AndB (MemEq p q (Const64 <typ.Int64> [8 + int64(bool2int(c>16))*8]) mem) (Eq64 (Load <typ.Int64> (OffPtr <p.Type> p [c-8]) mem) (Load <typ.Int64> (OffPtr <q.Type> q [c-8]) mem)))
for {
p := v_0
q := v_1
if v_2.Op != OpConst64 {
break
}
c := auxIntToInt64(v_2.AuxInt)
mem := v_3
if !(((c >= 12 && c <= 16) || (c >= 21 && c <= 24)) && canLoadUnaligned(config) && config.RegSize == 8) {
break
}
v.reset(OpAndB)
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
v1.AuxInt = int64ToAuxInt(8 + int64(bool2int(c > 16))*8)
v0.AddArg4(p, q, v1, mem)
v2 := b.NewValue0(v.Pos, OpEq64, typ.Bool)
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
v4.AuxInt = int64ToAuxInt(c - 8)
v4.AddArg(p)
v3.AddArg2(v4, mem)
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
v6.AuxInt = int64ToAuxInt(c - 8)
v6.AddArg(q)
v5.AddArg2(v6, mem)
v2.AddArg2(v3, v5)
v.AddArg2(v0, v2)
return true
}
// match: (MemEq p q (Const64 [c]) mem)
// cond: c >= 25 && c <= 32 && canLoadUnaligned(config) && config.RegSize == 8
// result: (AndB (MemEq p q (Const64 <typ.Int64> [16]) mem) (MemEq (OffPtr <p.Type> p [16]) (OffPtr <q.Type> q [16]) (Const64 <typ.Int64> [c-16]) mem))
for {
p := v_0
q := v_1
if v_2.Op != OpConst64 {
break
}
c := auxIntToInt64(v_2.AuxInt)
mem := v_3
if !(c >= 25 && c <= 32 && canLoadUnaligned(config) && config.RegSize == 8) {
break
}
v.reset(OpAndB)
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
v1.AuxInt = int64ToAuxInt(16)
v0.AddArg4(p, q, v1, mem)
v2 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
v3 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
v3.AuxInt = int64ToAuxInt(16)
v3.AddArg(p)
v4 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
v4.AuxInt = int64ToAuxInt(16)
v4.AddArg(q)
v5 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
v5.AuxInt = int64ToAuxInt(c - 16)
v2.AddArg4(v3, v4, v5, mem)
v.AddArg2(v0, v2)
return true
}
return false
}
func rewriteValuegeneric_OpMod16(v *Value) bool {

View file

@ -661,16 +661,22 @@ func equalVarString8(a string) bool {
}
func equalVarStringNoSpill(a, b string) bool {
s := string("ZZZZZZZZZ")
s := string("123456789012345678901234567890123")
// arm64:".*memequal"
memeq1 := a[:9] == s
memeq1 := a[:33] == s
// arm64:-".*"
memeq2 := s == a[:9]
// arm64:-"MOVB R0,.*SP",".*memequal"
memeq3 := s == b[:9]
memeq2 := s == a[:33]
// arm64:-"MOVB R0,.*SP" ".*memequal"
memeq3 := s == b[:33]
return memeq1 && memeq2 && memeq3
}
func equalVarString17(a string) bool {
b := string("12345678901234567")
// arm64:-".*memequal" "CMPW [$]55," "MOVD [$]3906085646303834169," "MOVD [$]4050765991979987505,"
return a[:17] == b
}
func cmpToCmn(a, b, c, d int) int {
var c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 int
// arm64:`CMN`,-`CMP`