cmd/compile/internal/amd64: add MOVLloadidx8 and MOVLstoreidx8

Currently we only use 1 and 4 as a scale for indexed 4-byte load.
In code generated in #20711 we can use indexed load with scale=8,
to improve performance:

name  old time/op  new time/op  delta
GM-6   108µs ± 0%    95µs ± 0%  -12.06%  (p=0.000 n=10+10)

So add new ops and combine loadidx1(shift 3..).. into loadidx8,
same for stores.

Change-Id: I5ed1c250ac40960e20606580cf9de221e75b72f1
Reviewed-on: https://go-review.googlesource.com/46134
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Ilya Tocar 2017-06-20 15:36:34 -05:00
parent 605331f43e
commit a4e1a72f0a
5 changed files with 306 additions and 9 deletions

View file

@ -426,6 +426,7 @@ func init() {
{name: "MOVWloadidx2", argLength: 3, reg: gploadidx, asm: "MOVWLZX", aux: "SymOff", typ: "UInt16", symEffect: "Read"}, // load 2 bytes from arg0+2*arg1+auxint+aux. arg2=mem
{name: "MOVLloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load 4 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVLloadidx4", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem
{name: "MOVLloadidx8", argLength: 3, reg: gploadidx, asm: "MOVL", aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load 4 bytes from arg0+8*arg1+auxint+aux. arg2=mem
{name: "MOVQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVQ", aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load 8 bytes from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVQ", aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
// TODO: sign-extending indexed loads
@ -435,6 +436,7 @@ func init() {
{name: "MOVWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVW", aux: "SymOff", symEffect: "Write"}, // store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
{name: "MOVLstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVLstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
{name: "MOVLstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVL", aux: "SymOff", symEffect: "Write"}, // store 4 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
{name: "MOVQstoreidx1", argLength: 4, reg: gpstoreidx, asm: "MOVQ", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
{name: "MOVQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVQ", aux: "SymOff", symEffect: "Write"}, // store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
// TODO: add size-mismatched indexed loads, like MOVBstoreidx4.