mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: make a copy of Phi input if it is still live
Register of Phi input is allocated to the Phi. So if the Phi input is still live after Phi, we may need to use a spill. In this case, copy the Phi input to a spare register to avoid a spill. Originally targeted the code in issue #16187, and this CL indeed removes the spill, but doesn't seem to help on benchmark result. It may help in general, though. On AMD64: name old time/op new time/op delta BinaryTree17-12 2.79s ± 1% 2.76s ± 0% -1.33% (p=0.000 n=10+10) Fannkuch11-12 3.02s ± 0% 3.14s ± 0% +3.99% (p=0.000 n=10+10) FmtFprintfEmpty-12 51.2ns ± 0% 51.4ns ± 3% ~ (p=0.368 n=8+10) FmtFprintfString-12 145ns ± 0% 144ns ± 0% -0.69% (p=0.000 n=6+9) FmtFprintfInt-12 127ns ± 1% 124ns ± 1% -2.79% (p=0.000 n=10+9) FmtFprintfIntInt-12 186ns ± 0% 184ns ± 0% -1.34% (p=0.000 n=10+9) FmtFprintfPrefixedInt-12 196ns ± 0% 194ns ± 0% -0.97% (p=0.000 n=9+9) FmtFprintfFloat-12 293ns ± 2% 287ns ± 0% -2.00% (p=0.000 n=10+9) FmtManyArgs-12 847ns ± 1% 829ns ± 0% -2.17% (p=0.000 n=10+7) GobDecode-12 7.17ms ± 0% 7.18ms ± 0% ~ (p=0.123 n=10+10) GobEncode-12 6.08ms ± 1% 6.08ms ± 0% ~ (p=0.497 n=10+9) Gzip-12 277ms ± 1% 275ms ± 1% -0.47% (p=0.028 n=10+9) Gunzip-12 39.1ms ± 2% 38.2ms ± 1% -2.20% (p=0.000 n=10+9) HTTPClientServer-12 90.9µs ± 4% 87.7µs ± 2% -3.51% (p=0.001 n=9+10) JSONEncode-12 17.3ms ± 1% 16.5ms ± 0% -5.02% (p=0.000 n=9+9) JSONDecode-12 54.6ms ± 1% 54.1ms ± 0% -0.99% (p=0.000 n=9+9) Mandelbrot200-12 4.45ms ± 0% 4.45ms ± 0% -0.02% (p=0.006 n=8+9) GoParse-12 3.44ms ± 0% 3.48ms ± 1% +0.95% (p=0.000 n=10+10) RegexpMatchEasy0_32-12 84.9ns ± 0% 85.0ns ± 0% ~ (p=0.241 n=8+8) RegexpMatchEasy0_1K-12 867ns ± 3% 915ns ±11% +5.55% (p=0.037 n=10+10) RegexpMatchEasy1_32-12 82.7ns ± 5% 83.9ns ± 4% ~ (p=0.161 n=9+10) RegexpMatchEasy1_1K-12 361ns ± 1% 363ns ± 0% ~ (p=0.098 n=10+8) RegexpMatchMedium_32-12 126ns ± 0% 126ns ± 1% ~ (p=0.549 n=8+10) RegexpMatchMedium_1K-12 38.8µs ± 0% 39.1µs ± 0% +0.67% (p=0.000 n=9+8) RegexpMatchHard_32-12 1.95µs ± 0% 1.96µs ± 0% +0.43% (p=0.000 n=9+9) RegexpMatchHard_1K-12 59.0µs ± 0% 59.1µs ± 0% +0.27% (p=0.000 n=10+9) Revcomp-12 436ms ± 1% 431ms ± 1% -1.19% (p=0.005 n=10+10) Template-12 56.7ms ± 1% 57.1ms ± 1% +0.71% (p=0.001 n=10+9) TimeParse-12 312ns ± 0% 310ns ± 0% -0.80% (p=0.000 n=10+9) TimeFormat-12 336ns ± 0% 332ns ± 0% -1.19% (p=0.000 n=8+7) [Geo mean] 59.2µs 58.9µs -0.42% On PPC64: name old time/op new time/op delta BinaryTree17-2 4.67s ± 2% 4.71s ± 1% ~ (p=0.421 n=5+5) Fannkuch11-2 3.92s ± 1% 3.94s ± 0% +0.46% (p=0.032 n=5+5) FmtFprintfEmpty-2 122ns ± 0% 120ns ± 2% -1.80% (p=0.016 n=4+5) FmtFprintfString-2 305ns ± 1% 299ns ± 1% -1.84% (p=0.008 n=5+5) FmtFprintfInt-2 243ns ± 0% 241ns ± 1% -0.66% (p=0.016 n=4+5) FmtFprintfIntInt-2 361ns ± 1% 356ns ± 1% -1.49% (p=0.016 n=5+5) FmtFprintfPrefixedInt-2 355ns ± 1% 357ns ± 1% ~ (p=0.333 n=5+5) FmtFprintfFloat-2 502ns ± 2% 498ns ± 1% ~ (p=0.151 n=5+5) FmtManyArgs-2 1.55µs ± 2% 1.59µs ± 1% +2.52% (p=0.008 n=5+5) GobDecode-2 13.0ms ± 1% 13.0ms ± 1% ~ (p=0.841 n=5+5) GobEncode-2 11.8ms ± 1% 11.8ms ± 1% ~ (p=0.690 n=5+5) Gzip-2 499ms ± 1% 503ms ± 0% ~ (p=0.421 n=5+5) Gunzip-2 86.5ms ± 0% 86.4ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-2 68.2µs ± 2% 69.6µs ± 3% ~ (p=0.151 n=5+5) JSONEncode-2 39.0ms ± 1% 37.2ms ± 1% -4.65% (p=0.008 n=5+5) JSONDecode-2 122ms ± 1% 126ms ± 1% +2.63% (p=0.008 n=5+5) Mandelbrot200-2 6.08ms ± 1% 5.89ms ± 1% -3.06% (p=0.008 n=5+5) GoParse-2 5.95ms ± 2% 5.98ms ± 1% ~ (p=0.421 n=5+5) RegexpMatchEasy0_32-2 331ns ± 1% 328ns ± 1% ~ (p=0.056 n=5+5) RegexpMatchEasy0_1K-2 1.45µs ± 0% 1.47µs ± 0% +1.13% (p=0.008 n=5+5) RegexpMatchEasy1_32-2 359ns ± 0% 353ns ± 0% -1.84% (p=0.008 n=5+5) RegexpMatchEasy1_1K-2 1.79µs ± 0% 1.81µs ± 1% +1.16% (p=0.008 n=5+5) RegexpMatchMedium_32-2 420ns ± 2% 413ns ± 0% -1.72% (p=0.008 n=5+5) RegexpMatchMedium_1K-2 70.2µs ± 1% 69.5µs ± 1% -1.09% (p=0.032 n=5+5) RegexpMatchHard_32-2 3.87µs ± 1% 3.65µs ± 0% -5.86% (p=0.008 n=5+5) RegexpMatchHard_1K-2 111µs ± 0% 105µs ± 0% -5.49% (p=0.016 n=5+4) Revcomp-2 1.00s ± 1% 1.01s ± 2% ~ (p=0.151 n=5+5) Template-2 113ms ± 1% 113ms ± 2% ~ (p=0.841 n=5+5) TimeParse-2 555ns ± 0% 550ns ± 1% -0.87% (p=0.032 n=5+5) TimeFormat-2 736ns ± 1% 704ns ± 1% -4.35% (p=0.008 n=5+5) [Geo mean] 120µs 119µs -0.77% Reduce "spilled value remains" by 0.6% in cmd/go on AMD64. Change-Id: If655df343b0f30d1a49ab1ab644f10c698b96f3e Reviewed-on: https://go-review.googlesource.com/32442 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
d24b57a6a1
commit
348275cda6
1 changed files with 26 additions and 2 deletions
|
|
@ -869,7 +869,6 @@ func (s *regAllocState) regalloc(f *Func) {
|
|||
m := s.values[a.ID].regs &^ phiUsed & s.allocatable
|
||||
if m != 0 {
|
||||
r := pickReg(m)
|
||||
s.freeReg(r)
|
||||
phiUsed |= regMask(1) << r
|
||||
phiRegs = append(phiRegs, r)
|
||||
} else {
|
||||
|
|
@ -878,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
|||
}
|
||||
|
||||
// Second pass - deallocate any phi inputs which are now dead.
|
||||
for _, v := range phis {
|
||||
for i, v := range phis {
|
||||
if !s.values[v.ID].needReg {
|
||||
continue
|
||||
}
|
||||
|
|
@ -887,6 +886,31 @@ func (s *regAllocState) regalloc(f *Func) {
|
|||
// Input is dead beyond the phi, deallocate
|
||||
// anywhere else it might live.
|
||||
s.freeRegs(s.values[a.ID].regs)
|
||||
} else {
|
||||
// Input is still live.
|
||||
// Try to move it around before kicking out, if there is a free register.
|
||||
// We generate a Copy in the predecessor block and record it. It will be
|
||||
// deleted if never used.
|
||||
r := phiRegs[i]
|
||||
if r == noRegister {
|
||||
continue
|
||||
}
|
||||
// Pick a free register. At this point some registers used in the predecessor
|
||||
// block may have been deallocated. Those are the ones used for Phis. Exclude
|
||||
// them (and they are not going to be helpful anyway).
|
||||
m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
|
||||
if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
|
||||
r2 := pickReg(m)
|
||||
c := p.NewValue1(a.Line, OpCopy, a.Type, s.regs[r].c)
|
||||
s.copies[c] = false
|
||||
if s.f.pass.debug > regDebug {
|
||||
fmt.Printf("copy %s to %s : %s\n", a, c, s.registers[r2].Name())
|
||||
}
|
||||
s.setOrig(c, a)
|
||||
s.assignReg(r2, a, c)
|
||||
s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
|
||||
}
|
||||
s.freeReg(r)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue