mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: improve stp merging for non-sequent cases
Original algorithm merges stores with the first mergeable store in the chain, but it misses some cases. Additional reordering stores in increasing order of memory access in the chain allows merging in these cases. Fixes #71987 There are the results of sweet benchmarks and the difference between sizes of sections .text │ old.results │ new.results │ │ sec/op │ sec/op vs base │ BleveIndexBatch100-4 7.614 ± 2% 7.548 ± 1% ~ (p=0.190 n=10) ESBuildThreeJS-4 821.3m ± 0% 819.0m ± 1% ~ (p=0.165 n=10) ESBuildRomeTS-4 206.2m ± 1% 204.4m ± 1% -0.90% (p=0.023 n=10) EtcdPut-4 64.89m ± 1% 64.94m ± 2% ~ (p=0.684 n=10) EtcdSTM-4 318.4m ± 0% 319.2m ± 1% ~ (p=0.631 n=10) GoBuildKubelet-4 157.4 ± 0% 157.6 ± 0% ~ (p=0.105 n=10) GoBuildKubeletLink-4 12.42 ± 2% 12.41 ± 1% ~ (p=0.529 n=10) GoBuildIstioctl-4 124.4 ± 0% 124.4 ± 0% ~ (p=0.579 n=10) GoBuildIstioctlLink-4 8.700 ± 1% 8.693 ± 1% ~ (p=0.912 n=10) GoBuildFrontend-4 46.52 ± 0% 46.50 ± 0% ~ (p=0.971 n=10) GoBuildFrontendLink-4 2.282 ± 1% 2.272 ± 1% ~ (p=0.529 n=10) GoBuildTsgo-4 75.02 ± 1% 75.31 ± 1% ~ (p=0.436 n=10) GoBuildTsgoLink-4 1.229 ± 1% 1.219 ± 1% -0.82% (p=0.035 n=10) GopherLuaKNucleotide-4 34.77 ± 5% 34.31 ± 1% -1.33% (p=0.015 n=10) MarkdownRenderXHTML-4 286.6m ± 0% 285.7m ± 1% ~ (p=0.315 n=10) Tile38QueryLoad-4 657.2µ ± 1% 660.3µ ± 0% ~ (p=0.436 n=10) geomean 2.570 2.563 -0.24% Executable Old .text New .text Change ------------------------------------------------------- benchmark 6504820 6504020 -0.01% bleve-index-bench 3903860 3903636 -0.01% esbuild 4801012 4801172 +0.00% esbuild-bench 1256404 1256340 -0.01% etcd 9188148 9187076 -0.01% etcd-bench 6462228 6461524 -0.01% go 5924468 5923892 -0.01% go-build-bench 1282004 1281940 -0.00% gopher-lua-bench 1639540 1639348 -0.01% markdown-bench 1478452 1478356 -0.01% tile38-bench 2753524 2753300 -0.01% tile38-server 10241380 10240068 -0.01% Change-Id: Ieb4fdfd656aca458f65fc45938de70550632bd13 Reviewed-on: https://go-review.googlesource.com/c/go/+/698097 Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Mark Freeman <markfreeman@google.com> Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
bdd51e7855
commit
4c63d798cb
2 changed files with 133 additions and 6 deletions
|
|
@ -212,6 +212,12 @@ func pairStores(f *Func) {
|
|||
last := f.Cache.allocBoolSlice(f.NumValues())
|
||||
defer f.Cache.freeBoolSlice(last)
|
||||
|
||||
type stChainElem struct {
|
||||
v *Value
|
||||
i int // Index in chain (0 == last store)
|
||||
}
|
||||
var order []stChainElem
|
||||
|
||||
// prevStore returns the previous store in the
|
||||
// same block, or nil if there are none.
|
||||
prevStore := func(v *Value) *Value {
|
||||
|
|
@ -225,6 +231,27 @@ func pairStores(f *Func) {
|
|||
return m
|
||||
}
|
||||
|
||||
// storeWidth returns the width of store,
|
||||
// or 0 if it is not a store
|
||||
storeWidth := func(op Op) int64 {
|
||||
var width int64
|
||||
switch op {
|
||||
case OpARM64MOVDstore, OpARM64FMOVDstore:
|
||||
width = 8
|
||||
case OpARM64MOVWstore, OpARM64FMOVSstore:
|
||||
width = 4
|
||||
case OpARM64MOVHstore:
|
||||
width = 2
|
||||
case OpARM64MOVBstore:
|
||||
width = 1
|
||||
default:
|
||||
width = 0
|
||||
}
|
||||
return width
|
||||
}
|
||||
|
||||
const limit = 10
|
||||
|
||||
for _, b := range f.Blocks {
|
||||
// Find last store in block, so we can
|
||||
// walk the stores last to first.
|
||||
|
|
@ -250,9 +277,84 @@ func pairStores(f *Func) {
|
|||
}
|
||||
}
|
||||
|
||||
order = order[:0]
|
||||
for i, v := 0, lastMem; v != nil; v = prevStore(v) {
|
||||
order = append(order, stChainElem{v, i})
|
||||
i++
|
||||
}
|
||||
reordering:
|
||||
for i, v_elem := range order {
|
||||
v := v_elem.v
|
||||
if v.Uses != 1 {
|
||||
// We can't reorder stores if the earlier
|
||||
// store has any use besides the next one
|
||||
// in the store chain.
|
||||
// (Unless we could check the aliasing of
|
||||
// all those other uses.)
|
||||
continue
|
||||
}
|
||||
widthV := storeWidth(v.Op)
|
||||
if widthV == 0 {
|
||||
// Can't reorder with any other memory operations.
|
||||
// (atomics, calls, ...)
|
||||
continue
|
||||
}
|
||||
chain := order[i+1:]
|
||||
count := limit
|
||||
// Var 'count' keeps us in O(n) territory
|
||||
for j, w_elem := range chain {
|
||||
if count--; count == 0 {
|
||||
// Only look back so far.
|
||||
// This keeps us in O(n) territory, and it
|
||||
// also prevents us from keeping values
|
||||
// in registers for too long (and thus
|
||||
// needing to spill them).
|
||||
continue reordering
|
||||
}
|
||||
|
||||
w := w_elem.v
|
||||
if w.Uses != 1 {
|
||||
// We can't reorder stores if the earlier
|
||||
// store has any use besides the next one
|
||||
// in the store chain.
|
||||
// (Unless we could check the aliasing of
|
||||
// all those other uses.)
|
||||
continue reordering
|
||||
}
|
||||
|
||||
widthW := storeWidth(w.Op)
|
||||
if widthW == 0 {
|
||||
// Can't reorder with any other memory operations.
|
||||
// (atomics, calls, ...)
|
||||
continue reordering
|
||||
}
|
||||
|
||||
// We only allow reordering with respect to other
|
||||
// writes to the same pointer and aux, so we can
|
||||
// compute the exact the aliasing relationship.
|
||||
if w.Args[0] != v.Args[0] ||
|
||||
w.Aux != v.Aux {
|
||||
// Can't reorder with operation with incomparable destination memory pointer.
|
||||
continue reordering
|
||||
}
|
||||
if overlap(w.AuxInt, widthW, v.AuxInt, widthV) {
|
||||
// Aliases with the same slot with v's location.
|
||||
continue reordering
|
||||
}
|
||||
|
||||
// Reordering stores in increasing order of memory access
|
||||
if v.AuxInt < w.AuxInt {
|
||||
order[i], order[i+j+1] = order[i+j+1], order[i]
|
||||
v = w
|
||||
widthV = widthW
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check all stores, from last to first.
|
||||
memCheck:
|
||||
for v := lastMem; v != nil; v = prevStore(v) {
|
||||
for i, v_elem := range order {
|
||||
v := v_elem.v
|
||||
info := pairableStores[v.Op]
|
||||
if info.width == 0 {
|
||||
continue // Not pairable.
|
||||
|
|
@ -269,8 +371,10 @@ func pairStores(f *Func) {
|
|||
// Look for earlier store we can combine with.
|
||||
lowerOk := true
|
||||
higherOk := true
|
||||
count := 10 // max lookback distance
|
||||
for w := prevStore(v); w != nil; w = prevStore(w) {
|
||||
count := limit // max lookback distance
|
||||
chain := order[i+1:]
|
||||
for _, w_elem := range chain {
|
||||
w := w_elem.v
|
||||
if w.Uses != 1 {
|
||||
// We can't combine stores if the earlier
|
||||
// store has any use besides the next one
|
||||
|
|
@ -293,11 +397,17 @@ func pairStores(f *Func) {
|
|||
args[1], args[2] = args[2], args[1]
|
||||
off -= info.width
|
||||
}
|
||||
|
||||
v.reset(info.pair)
|
||||
v.AddArgs(args...)
|
||||
v.Aux = aux
|
||||
v.AuxInt = off
|
||||
v.Pos = w.Pos // take position of earlier of the two stores (TODO: not really working?)
|
||||
// Take position of earlier of the two stores
|
||||
if v_elem.i < w_elem.i {
|
||||
v.Pos = w.Pos
|
||||
} else {
|
||||
w.Pos = v.Pos
|
||||
}
|
||||
|
||||
// Make w just a memory copy.
|
||||
wmem := w.MemoryArg()
|
||||
|
|
|
|||
|
|
@ -1053,17 +1053,34 @@ func dwstoreF32(p *struct{ a, b float32 }, x, y float32) {
|
|||
}
|
||||
|
||||
func dwstoreBig(p *struct{ a, b, c, d, e, f int64 }, a, b, c, d, e, f int64) {
|
||||
// This is not perfect. We merge b+a, then d+e, then c and f have no pair.
|
||||
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), 16\(R[0-9]+\)`
|
||||
p.c = c
|
||||
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), 32\(R[0-9]+\)`
|
||||
p.f = f
|
||||
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), \(R[0-9]+\)`
|
||||
p.a = a
|
||||
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), 24\(R[0-9]+\)`
|
||||
p.e = e
|
||||
p.d = d
|
||||
p.b = b
|
||||
}
|
||||
|
||||
func dwstoreUnorderedArray(p *struct{ a, b, c, d int }, a, b, c, d int) {
|
||||
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), 16\(R[0-9]+\)`
|
||||
p.c = c
|
||||
p.d = d
|
||||
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), \(R[0-9]+\)`
|
||||
p.a = a
|
||||
p.b = b
|
||||
}
|
||||
|
||||
func dwstoreBigNil(p *struct{ i, j struct{ a, b, c int } }) {
|
||||
// arm64:`STP\s\(ZR, ZR\), 32\(R[0-9]+\)`
|
||||
// arm64:`STP\s\(ZR, ZR\), 16\(R[0-9]+\)`
|
||||
p.j = struct{ a, b, c int }{}
|
||||
// arm64:`STP\s\(ZR, ZR\), \(R[0-9]+\)`
|
||||
p.i = struct{ a, b, c int }{}
|
||||
}
|
||||
|
||||
func dwstoreRet() [2]int {
|
||||
// arm64:"STP\t"
|
||||
return [2]int{5, 6}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue