mirror of
https://github.com/golang/go.git
synced 2026-02-07 02:09:55 +00:00
cmd/compile: optimize small constant-sized MemEq
Add optimization patterns for MemEq with small constant sizes
(3-32 bytes). These patterns help to avoid runtime calls for
small sizes.
For sizes 3-16, combine two chunks loading and comparison.
For sizes 17-32, combine a 16-byte comparison with the remaining bytes.
This change may increase binary size slightly due to inline expansion,
but improves performance for code with many small memequals,
e.g. DecodehealingTracker benchmark on arm64:
shortname: minio
pkg: github.com/minio/minio/cmd
│ Orig.res │ Uexp.res │
│ sec/op │ sec/op vs base │
DecodehealingTracker-4 842.5n ± 1% 794.0n ± 3% -5.75% (p=0.000 n=10)
AppendMsgResyncTargetsInfo-4 8.472n ± 0% 8.472n ± 0% ~ (p=0.582 n=10)
DataUpdateTracker-4 2.856µ ± 2% 2.804µ ± 3% ~ (p=0.210 n=10)
MarshalMsgdataUsageCacheInfo-4 131.2n ± 1% 131.6n ± 2% ~ (p=0.494 n=10)
geomean 227.4n 223.2n -1.86%
│ Orig.res │ Uexp.res │
│ B/s │ B/s vs base │
DecodehealingTracker-4 352.0Mi ± 1% 373.5Mi ± 3% +6.10% (p=0.000 n=10)
AppendMsgResyncTargetsInfo-4 1.099Gi ± 0% 1.099Gi ± 0% ~ (p=0.183 n=10)
DataUpdateTracker-4 341.8Ki ± 3% 351.6Ki ± 3% ~ (p=0.286 n=10)
geomean 50.95Mi 52.46Mi +2.96%
Change-Id: If3d7e7395656d5f36e3ab303a71044293d17bc3e
Reviewed-on: https://go-review.googlesource.com/c/go/+/688195
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
26ffe78b8c
commit
c7258178cd
4 changed files with 225 additions and 5 deletions
|
|
@ -1560,6 +1560,43 @@
|
|||
|
||||
(MemEq p q _ _) && isSamePtr(p, q) => (ConstBool <typ.Bool> [true])
|
||||
|
||||
// 3-32 bytes memeq (enabled only with support of unaligned loads and 8-byte max word size)
|
||||
|
||||
(MemEq p q (Const64 [c]) mem)
|
||||
&& (c == 3 || c == 5 || c == 9 || c == 17)
|
||||
&& canLoadUnaligned(config)
|
||||
&& config.RegSize == 8
|
||||
=> (AndB (MemEq p q (Const64 <typ.Int64> [c-1]) mem)
|
||||
(Eq8 (Load <typ.Int8> (OffPtr <p.Type> p [c-1]) mem) (Load <typ.Int8> (OffPtr <q.Type> q [c-1]) mem)))
|
||||
|
||||
(MemEq p q (Const64 [c]) mem)
|
||||
&& (c == 6 || c == 10 || c == 18)
|
||||
&& canLoadUnaligned(config)
|
||||
&& config.RegSize == 8
|
||||
=> (AndB (MemEq p q (Const64 <typ.Int64> [c-2]) mem)
|
||||
(Eq16 (Load <typ.Int16> (OffPtr <p.Type> p [c-2]) mem) (Load <typ.Int16> (OffPtr <q.Type> q [c-2]) mem)))
|
||||
|
||||
(MemEq p q (Const64 [c]) mem)
|
||||
&& (c == 7 || c == 11 || c == 19 || c == 20)
|
||||
&& canLoadUnaligned(config)
|
||||
&& config.RegSize == 8
|
||||
=> (AndB (MemEq p q (Const64 <typ.Int64> [min(c-3,16)]) mem)
|
||||
(Eq32 (Load <typ.Int32> (OffPtr <p.Type> p [c-4]) mem) (Load <typ.Int32> (OffPtr <q.Type> q [c-4]) mem)))
|
||||
|
||||
(MemEq p q (Const64 [c]) mem)
|
||||
&& ((c >= 12 && c <= 16) || (c >= 21 && c <= 24))
|
||||
&& canLoadUnaligned(config)
|
||||
&& config.RegSize == 8
|
||||
=> (AndB (MemEq p q (Const64 <typ.Int64> [8 + int64(bool2int(c>16))*8]) mem)
|
||||
(Eq64 (Load <typ.Int64> (OffPtr <p.Type> p [c-8]) mem) (Load <typ.Int64> (OffPtr <q.Type> q [c-8]) mem)))
|
||||
|
||||
(MemEq p q (Const64 [c]) mem)
|
||||
&& c >= 25 && c <= 32
|
||||
&& canLoadUnaligned(config)
|
||||
&& config.RegSize == 8
|
||||
=> (AndB (MemEq p q (Const64 <typ.Int64> [16]) mem)
|
||||
(MemEq (OffPtr <p.Type> p [16]) (OffPtr <q.Type> q [16]) (Const64 <typ.Int64> [c-16]) mem))
|
||||
|
||||
// Turn known-size calls to memclrNoHeapPointers into a Zero.
|
||||
// Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
|
||||
(SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))
|
||||
|
|
|
|||
|
|
@ -2786,3 +2786,12 @@ func imakeOfStructMake(v *Value) *Value {
|
|||
}
|
||||
return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
|
||||
}
|
||||
|
||||
// bool2int converts bool to int: true to 1, false to 0
|
||||
func bool2int(x bool) int {
|
||||
var b int
|
||||
if x {
|
||||
b = 1
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14983,6 +14983,174 @@ func rewriteValuegeneric_OpMemEq(v *Value) bool {
|
|||
}
|
||||
break
|
||||
}
|
||||
// match: (MemEq p q (Const64 [c]) mem)
|
||||
// cond: (c == 3 || c == 5 || c == 9 || c == 17) && canLoadUnaligned(config) && config.RegSize == 8
|
||||
// result: (AndB (MemEq p q (Const64 <typ.Int64> [c-1]) mem) (Eq8 (Load <typ.Int8> (OffPtr <p.Type> p [c-1]) mem) (Load <typ.Int8> (OffPtr <q.Type> q [c-1]) mem)))
|
||||
for {
|
||||
p := v_0
|
||||
q := v_1
|
||||
if v_2.Op != OpConst64 {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_2.AuxInt)
|
||||
mem := v_3
|
||||
if !((c == 3 || c == 5 || c == 9 || c == 17) && canLoadUnaligned(config) && config.RegSize == 8) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAndB)
|
||||
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
|
||||
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
|
||||
v1.AuxInt = int64ToAuxInt(c - 1)
|
||||
v0.AddArg4(p, q, v1, mem)
|
||||
v2 := b.NewValue0(v.Pos, OpEq8, typ.Bool)
|
||||
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int8)
|
||||
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
||||
v4.AuxInt = int64ToAuxInt(c - 1)
|
||||
v4.AddArg(p)
|
||||
v3.AddArg2(v4, mem)
|
||||
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int8)
|
||||
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
|
||||
v6.AuxInt = int64ToAuxInt(c - 1)
|
||||
v6.AddArg(q)
|
||||
v5.AddArg2(v6, mem)
|
||||
v2.AddArg2(v3, v5)
|
||||
v.AddArg2(v0, v2)
|
||||
return true
|
||||
}
|
||||
// match: (MemEq p q (Const64 [c]) mem)
|
||||
// cond: (c == 6 || c == 10 || c == 18) && canLoadUnaligned(config) && config.RegSize == 8
|
||||
// result: (AndB (MemEq p q (Const64 <typ.Int64> [c-2]) mem) (Eq16 (Load <typ.Int16> (OffPtr <p.Type> p [c-2]) mem) (Load <typ.Int16> (OffPtr <q.Type> q [c-2]) mem)))
|
||||
for {
|
||||
p := v_0
|
||||
q := v_1
|
||||
if v_2.Op != OpConst64 {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_2.AuxInt)
|
||||
mem := v_3
|
||||
if !((c == 6 || c == 10 || c == 18) && canLoadUnaligned(config) && config.RegSize == 8) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAndB)
|
||||
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
|
||||
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
|
||||
v1.AuxInt = int64ToAuxInt(c - 2)
|
||||
v0.AddArg4(p, q, v1, mem)
|
||||
v2 := b.NewValue0(v.Pos, OpEq16, typ.Bool)
|
||||
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
|
||||
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
||||
v4.AuxInt = int64ToAuxInt(c - 2)
|
||||
v4.AddArg(p)
|
||||
v3.AddArg2(v4, mem)
|
||||
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int16)
|
||||
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
|
||||
v6.AuxInt = int64ToAuxInt(c - 2)
|
||||
v6.AddArg(q)
|
||||
v5.AddArg2(v6, mem)
|
||||
v2.AddArg2(v3, v5)
|
||||
v.AddArg2(v0, v2)
|
||||
return true
|
||||
}
|
||||
// match: (MemEq p q (Const64 [c]) mem)
|
||||
// cond: (c == 7 || c == 11 || c == 19 || c == 20) && canLoadUnaligned(config) && config.RegSize == 8
|
||||
// result: (AndB (MemEq p q (Const64 <typ.Int64> [min(c-3,16)]) mem) (Eq32 (Load <typ.Int32> (OffPtr <p.Type> p [c-4]) mem) (Load <typ.Int32> (OffPtr <q.Type> q [c-4]) mem)))
|
||||
for {
|
||||
p := v_0
|
||||
q := v_1
|
||||
if v_2.Op != OpConst64 {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_2.AuxInt)
|
||||
mem := v_3
|
||||
if !((c == 7 || c == 11 || c == 19 || c == 20) && canLoadUnaligned(config) && config.RegSize == 8) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAndB)
|
||||
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
|
||||
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
|
||||
v1.AuxInt = int64ToAuxInt(min(c-3, 16))
|
||||
v0.AddArg4(p, q, v1, mem)
|
||||
v2 := b.NewValue0(v.Pos, OpEq32, typ.Bool)
|
||||
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
|
||||
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
||||
v4.AuxInt = int64ToAuxInt(c - 4)
|
||||
v4.AddArg(p)
|
||||
v3.AddArg2(v4, mem)
|
||||
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int32)
|
||||
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
|
||||
v6.AuxInt = int64ToAuxInt(c - 4)
|
||||
v6.AddArg(q)
|
||||
v5.AddArg2(v6, mem)
|
||||
v2.AddArg2(v3, v5)
|
||||
v.AddArg2(v0, v2)
|
||||
return true
|
||||
}
|
||||
// match: (MemEq p q (Const64 [c]) mem)
|
||||
// cond: ((c >= 12 && c <= 16) || (c >= 21 && c <= 24)) && canLoadUnaligned(config) && config.RegSize == 8
|
||||
// result: (AndB (MemEq p q (Const64 <typ.Int64> [8 + int64(bool2int(c>16))*8]) mem) (Eq64 (Load <typ.Int64> (OffPtr <p.Type> p [c-8]) mem) (Load <typ.Int64> (OffPtr <q.Type> q [c-8]) mem)))
|
||||
for {
|
||||
p := v_0
|
||||
q := v_1
|
||||
if v_2.Op != OpConst64 {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_2.AuxInt)
|
||||
mem := v_3
|
||||
if !(((c >= 12 && c <= 16) || (c >= 21 && c <= 24)) && canLoadUnaligned(config) && config.RegSize == 8) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAndB)
|
||||
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
|
||||
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
|
||||
v1.AuxInt = int64ToAuxInt(8 + int64(bool2int(c > 16))*8)
|
||||
v0.AddArg4(p, q, v1, mem)
|
||||
v2 := b.NewValue0(v.Pos, OpEq64, typ.Bool)
|
||||
v3 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
|
||||
v4 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
||||
v4.AuxInt = int64ToAuxInt(c - 8)
|
||||
v4.AddArg(p)
|
||||
v3.AddArg2(v4, mem)
|
||||
v5 := b.NewValue0(v.Pos, OpLoad, typ.Int64)
|
||||
v6 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
|
||||
v6.AuxInt = int64ToAuxInt(c - 8)
|
||||
v6.AddArg(q)
|
||||
v5.AddArg2(v6, mem)
|
||||
v2.AddArg2(v3, v5)
|
||||
v.AddArg2(v0, v2)
|
||||
return true
|
||||
}
|
||||
// match: (MemEq p q (Const64 [c]) mem)
|
||||
// cond: c >= 25 && c <= 32 && canLoadUnaligned(config) && config.RegSize == 8
|
||||
// result: (AndB (MemEq p q (Const64 <typ.Int64> [16]) mem) (MemEq (OffPtr <p.Type> p [16]) (OffPtr <q.Type> q [16]) (Const64 <typ.Int64> [c-16]) mem))
|
||||
for {
|
||||
p := v_0
|
||||
q := v_1
|
||||
if v_2.Op != OpConst64 {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_2.AuxInt)
|
||||
mem := v_3
|
||||
if !(c >= 25 && c <= 32 && canLoadUnaligned(config) && config.RegSize == 8) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAndB)
|
||||
v0 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
|
||||
v1 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
|
||||
v1.AuxInt = int64ToAuxInt(16)
|
||||
v0.AddArg4(p, q, v1, mem)
|
||||
v2 := b.NewValue0(v.Pos, OpMemEq, typ.Bool)
|
||||
v3 := b.NewValue0(v.Pos, OpOffPtr, p.Type)
|
||||
v3.AuxInt = int64ToAuxInt(16)
|
||||
v3.AddArg(p)
|
||||
v4 := b.NewValue0(v.Pos, OpOffPtr, q.Type)
|
||||
v4.AuxInt = int64ToAuxInt(16)
|
||||
v4.AddArg(q)
|
||||
v5 := b.NewValue0(v.Pos, OpConst64, typ.Int64)
|
||||
v5.AuxInt = int64ToAuxInt(c - 16)
|
||||
v2.AddArg4(v3, v4, v5, mem)
|
||||
v.AddArg2(v0, v2)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuegeneric_OpMod16(v *Value) bool {
|
||||
|
|
|
|||
|
|
@ -661,16 +661,22 @@ func equalVarString8(a string) bool {
|
|||
}
|
||||
|
||||
func equalVarStringNoSpill(a, b string) bool {
|
||||
s := string("ZZZZZZZZZ")
|
||||
s := string("123456789012345678901234567890123")
|
||||
// arm64:".*memequal"
|
||||
memeq1 := a[:9] == s
|
||||
memeq1 := a[:33] == s
|
||||
// arm64:-".*"
|
||||
memeq2 := s == a[:9]
|
||||
// arm64:-"MOVB R0,.*SP",".*memequal"
|
||||
memeq3 := s == b[:9]
|
||||
memeq2 := s == a[:33]
|
||||
// arm64:-"MOVB R0,.*SP" ".*memequal"
|
||||
memeq3 := s == b[:33]
|
||||
return memeq1 && memeq2 && memeq3
|
||||
}
|
||||
|
||||
func equalVarString17(a string) bool {
|
||||
b := string("12345678901234567")
|
||||
// arm64:-".*memequal" "CMPW [$]55," "MOVD [$]3906085646303834169," "MOVD [$]4050765991979987505,"
|
||||
return a[:17] == b
|
||||
}
|
||||
|
||||
func cmpToCmn(a, b, c, d int) int {
|
||||
var c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 int
|
||||
// arm64:`CMN`,-`CMP`
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue