mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: implement CMOV on amd64
This builds upon the branchelim pass, activating it for amd64 and lowering CondSelect. Special care is made to FPU instructions for NaN handling. Benchmark results on Xeon E5630 (Westmere EP): name old time/op new time/op delta BinaryTree17-16 4.99s ± 9% 4.66s ± 2% ~ (p=0.095 n=5+5) Fannkuch11-16 4.93s ± 3% 5.04s ± 2% ~ (p=0.548 n=5+5) FmtFprintfEmpty-16 58.8ns ± 7% 61.4ns ±14% ~ (p=0.579 n=5+5) FmtFprintfString-16 114ns ± 2% 114ns ± 4% ~ (p=0.603 n=5+5) FmtFprintfInt-16 181ns ± 4% 125ns ± 3% -30.90% (p=0.008 n=5+5) FmtFprintfIntInt-16 263ns ± 2% 217ns ± 2% -17.34% (p=0.008 n=5+5) FmtFprintfPrefixedInt-16 230ns ± 1% 212ns ± 1% -7.99% (p=0.008 n=5+5) FmtFprintfFloat-16 411ns ± 3% 344ns ± 5% -16.43% (p=0.008 n=5+5) FmtManyArgs-16 828ns ± 4% 790ns ± 2% -4.59% (p=0.032 n=5+5) GobDecode-16 10.9ms ± 4% 10.8ms ± 5% ~ (p=0.548 n=5+5) GobEncode-16 9.52ms ± 5% 9.46ms ± 2% ~ (p=1.000 n=5+5) Gzip-16 334ms ± 2% 337ms ± 2% ~ (p=0.548 n=5+5) Gunzip-16 64.4ms ± 1% 65.0ms ± 1% +1.00% (p=0.008 n=5+5) HTTPClientServer-16 156µs ± 3% 155µs ± 3% ~ (p=0.690 n=5+5) JSONEncode-16 21.0ms ± 1% 21.8ms ± 0% +3.76% (p=0.016 n=5+4) JSONDecode-16 95.1ms ± 0% 95.7ms ± 1% ~ (p=0.151 n=5+5) Mandelbrot200-16 6.38ms ± 1% 6.42ms ± 1% ~ (p=0.095 n=5+5) GoParse-16 5.47ms ± 2% 5.36ms ± 1% -1.95% (p=0.016 n=5+5) RegexpMatchEasy0_32-16 111ns ± 1% 111ns ± 1% ~ (p=0.635 n=5+4) RegexpMatchEasy0_1K-16 408ns ± 1% 411ns ± 2% ~ (p=0.087 n=5+5) RegexpMatchEasy1_32-16 103ns ± 1% 104ns ± 1% ~ (p=0.484 n=5+5) RegexpMatchEasy1_1K-16 659ns ± 2% 652ns ± 1% ~ (p=0.571 n=5+5) RegexpMatchMedium_32-16 176ns ± 2% 174ns ± 1% ~ (p=0.476 n=5+5) RegexpMatchMedium_1K-16 58.6µs ± 4% 57.7µs ± 4% ~ (p=0.548 n=5+5) RegexpMatchHard_32-16 3.07µs ± 3% 3.04µs ± 4% ~ (p=0.421 n=5+5) RegexpMatchHard_1K-16 89.2µs ± 1% 87.9µs ± 2% -1.52% (p=0.032 n=5+5) Revcomp-16 575ms ± 0% 587ms ± 2% +2.12% (p=0.032 n=4+5) Template-16 110ms ± 1% 107ms ± 3% -3.00% (p=0.032 n=5+5) TimeParse-16 463ns ± 0% 462ns ± 0% ~ (p=0.810 n=5+4) TimeFormat-16 538ns ± 0% 535ns ± 0% -0.63% (p=0.024 n=5+5) name old speed new speed delta GobDecode-16 70.7MB/s ± 4% 71.4MB/s ± 5% ~ (p=0.452 n=5+5) GobEncode-16 80.7MB/s ± 5% 81.2MB/s ± 2% ~ (p=1.000 n=5+5) Gzip-16 58.2MB/s ± 2% 57.7MB/s ± 2% ~ (p=0.452 n=5+5) Gunzip-16 302MB/s ± 1% 299MB/s ± 1% -0.99% (p=0.008 n=5+5) JSONEncode-16 92.4MB/s ± 1% 89.1MB/s ± 0% -3.63% (p=0.016 n=5+4) JSONDecode-16 20.4MB/s ± 0% 20.3MB/s ± 1% ~ (p=0.135 n=5+5) GoParse-16 10.6MB/s ± 2% 10.8MB/s ± 1% +2.00% (p=0.016 n=5+5) RegexpMatchEasy0_32-16 286MB/s ± 1% 285MB/s ± 3% ~ (p=1.000 n=5+5) RegexpMatchEasy0_1K-16 2.51GB/s ± 1% 2.49GB/s ± 2% ~ (p=0.095 n=5+5) RegexpMatchEasy1_32-16 309MB/s ± 1% 307MB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchEasy1_1K-16 1.55GB/s ± 2% 1.57GB/s ± 1% ~ (p=0.690 n=5+5) RegexpMatchMedium_32-16 5.68MB/s ± 2% 5.73MB/s ± 1% ~ (p=0.579 n=5+5) RegexpMatchMedium_1K-16 17.5MB/s ± 4% 17.8MB/s ± 4% ~ (p=0.500 n=5+5) RegexpMatchHard_32-16 10.4MB/s ± 3% 10.5MB/s ± 4% ~ (p=0.460 n=5+5) RegexpMatchHard_1K-16 11.5MB/s ± 1% 11.7MB/s ± 2% +1.57% (p=0.032 n=5+5) Revcomp-16 442MB/s ± 0% 433MB/s ± 2% -2.05% (p=0.032 n=4+5) Template-16 17.7MB/s ± 1% 18.2MB/s ± 3% +3.12% (p=0.032 n=5+5) Change-Id: I6972e8f35f2b31f9a42ac473a6bf419a18022558 Reviewed-on: https://go-review.googlesource.com/100935 Run-TryBot: Giovanni Bajo <rasky@develer.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
423111081b
commit
a35ec9a59e
9 changed files with 5238 additions and 117 deletions
|
|
@ -132,6 +132,7 @@ func init() {
|
|||
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
|
||||
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
|
||||
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
|
||||
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
|
||||
|
||||
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
|
||||
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
|
||||
|
|
@ -340,10 +341,57 @@ func init() {
|
|||
{name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg
|
||||
{name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg
|
||||
|
||||
// Note ASM for ops moves whole register
|
||||
//
|
||||
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
|
||||
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
|
||||
// CMOV instructions: 64, 32 and 16-bit sizes.
|
||||
// if arg2 encodes a true result, return arg1, else arg0
|
||||
{name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true},
|
||||
{name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
|
||||
{name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true},
|
||||
{name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true},
|
||||
{name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true},
|
||||
{name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true},
|
||||
{name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true},
|
||||
{name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
|
||||
{name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
|
||||
{name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true},
|
||||
|
||||
{name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true},
|
||||
{name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
|
||||
{name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true},
|
||||
{name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true},
|
||||
{name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true},
|
||||
{name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true},
|
||||
{name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true},
|
||||
{name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
|
||||
{name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
|
||||
{name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true},
|
||||
|
||||
{name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true},
|
||||
{name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
|
||||
{name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true},
|
||||
{name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true},
|
||||
{name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true},
|
||||
{name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true},
|
||||
{name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true},
|
||||
{name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
|
||||
{name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
|
||||
{name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true},
|
||||
|
||||
// CMOV with floating point instructions. We need separate pseudo-op to handle
|
||||
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
|
||||
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
|
||||
// code generation in amd64/ssa.go.
|
||||
{name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
|
||||
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
|
||||
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
|
||||
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
|
||||
{name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
|
||||
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
|
||||
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
|
||||
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
|
||||
{name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
|
||||
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
|
||||
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
|
||||
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
|
||||
|
||||
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
|
||||
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
|
||||
|
|
@ -578,7 +626,6 @@ func init() {
|
|||
{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
|
||||
//arg0=ptr,arg1=mem, returns void. Faults if ptr is nil.
|
||||
{name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
|
||||
|
||||
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
|
||||
// It saves all GP registers if necessary, but may clobber others.
|
||||
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue