mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.ssa] cmd/compile: more 386 port changes
Fix up zero/move code, including duff calls and rep movs. Handle the new ops generated by dec64.rules. Fix constant shifts. Change-Id: I7d89194b29b04311bfafa0fd93b9f5644af04df9 Reviewed-on: https://go-review.googlesource.com/25033 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
1b0404c4ca
commit
4a33af6bb6
9 changed files with 550 additions and 274 deletions
|
|
@ -83,8 +83,7 @@
|
||||||
(Not x) -> (XORLconst [1] x)
|
(Not x) -> (XORLconst [1] x)
|
||||||
|
|
||||||
// Lowering pointer arithmetic
|
// Lowering pointer arithmetic
|
||||||
(OffPtr [off] ptr) && is32Bit(off) -> (ADDLconst [off] ptr)
|
(OffPtr [off] ptr) -> (ADDLconst [off] ptr)
|
||||||
(OffPtr [off] ptr) -> (ADDL (MOVLconst [off]) ptr)
|
|
||||||
|
|
||||||
(Bswap32 x) -> (BSWAPL x)
|
(Bswap32 x) -> (BSWAPL x)
|
||||||
|
|
||||||
|
|
@ -99,6 +98,9 @@
|
||||||
(ZeroExt8to32 x) -> (MOVBLZX x)
|
(ZeroExt8to32 x) -> (MOVBLZX x)
|
||||||
(ZeroExt16to32 x) -> (MOVWLZX x)
|
(ZeroExt16to32 x) -> (MOVWLZX x)
|
||||||
|
|
||||||
|
(Signmask x) -> (SARLconst x [31])
|
||||||
|
(Zeromask x) -> (SBBLcarrymask (CMPL (MOVLconst [0]) x))
|
||||||
|
|
||||||
// Lowering truncation
|
// Lowering truncation
|
||||||
// Because we ignore high parts of registers, truncates are just copies.
|
// Because we ignore high parts of registers, truncates are just copies.
|
||||||
(Trunc16to8 x) -> x
|
(Trunc16to8 x) -> x
|
||||||
|
|
@ -161,6 +163,26 @@
|
||||||
(Rsh8x16 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
|
(Rsh8x16 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
|
||||||
(Rsh8x8 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
|
(Rsh8x8 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
|
||||||
|
|
||||||
|
// constant shifts
|
||||||
|
// generic opt rewrites all constant shifts to shift by Const64
|
||||||
|
(Lsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SHLLconst x [c])
|
||||||
|
(Rsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SARLconst x [c])
|
||||||
|
(Rsh32Ux64 x (Const64 [c])) && uint64(c) < 32 -> (SHRLconst x [c])
|
||||||
|
(Lsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SHLLconst x [c])
|
||||||
|
(Rsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SARWconst x [c])
|
||||||
|
(Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 -> (SHRWconst x [c])
|
||||||
|
(Lsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SHLLconst x [c])
|
||||||
|
(Rsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SARBconst x [c])
|
||||||
|
(Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 -> (SHRBconst x [c])
|
||||||
|
|
||||||
|
// large constant shifts
|
||||||
|
(Lsh32x64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
|
||||||
|
(Rsh32Ux64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
|
||||||
|
(Lsh16x64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
|
||||||
|
(Rsh16Ux64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
|
||||||
|
(Lsh8x64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
|
||||||
|
(Rsh8Ux64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
|
||||||
|
|
||||||
// Lowering comparisons
|
// Lowering comparisons
|
||||||
(Less32 x y) -> (SETL (CMPL x y))
|
(Less32 x y) -> (SETL (CMPL x y))
|
||||||
(Less16 x y) -> (SETL (CMPW x y))
|
(Less16 x y) -> (SETL (CMPW x y))
|
||||||
|
|
@ -241,7 +263,6 @@
|
||||||
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBload src mem) mem)
|
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBload src mem) mem)
|
||||||
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstore dst (MOVWload src mem) mem)
|
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstore dst (MOVWload src mem) mem)
|
||||||
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstore dst (MOVLload src mem) mem)
|
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstore dst (MOVLload src mem) mem)
|
||||||
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 16 -> (MOVOstore dst (MOVOload src mem) mem)
|
|
||||||
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
|
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
|
||||||
(MOVBstore [2] dst (MOVBload [2] src mem)
|
(MOVBstore [2] dst (MOVBload [2] src mem)
|
||||||
(MOVWstore dst (MOVWload src mem) mem))
|
(MOVWstore dst (MOVWload src mem) mem))
|
||||||
|
|
@ -254,21 +275,32 @@
|
||||||
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 7 ->
|
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 7 ->
|
||||||
(MOVLstore [3] dst (MOVLload [3] src mem)
|
(MOVLstore [3] dst (MOVLload [3] src mem)
|
||||||
(MOVLstore dst (MOVLload src mem) mem))
|
(MOVLstore dst (MOVLload src mem) mem))
|
||||||
|
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 ->
|
||||||
|
(MOVLstore [4] dst (MOVLload [4] src mem)
|
||||||
|
(MOVLstore dst (MOVLload src mem) mem))
|
||||||
|
|
||||||
|
// Adjust moves to be a multiple of 4 bytes.
|
||||||
|
(Move [s] dst src mem)
|
||||||
|
&& SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size()%4 != 0 ->
|
||||||
|
(Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%4]
|
||||||
|
(ADDLconst <dst.Type> dst [SizeAndAlign(s).Size()%4])
|
||||||
|
(ADDLconst <src.Type> src [SizeAndAlign(s).Size()%4])
|
||||||
|
(MOVLstore dst (MOVLload src mem) mem))
|
||||||
|
|
||||||
// Medium copying uses a duff device.
|
// Medium copying uses a duff device.
|
||||||
(Move [s] dst src mem)
|
(Move [s] dst src mem)
|
||||||
&& SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0
|
&& SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0
|
||||||
&& !config.noDuffDevice ->
|
&& !config.noDuffDevice ->
|
||||||
(DUFFCOPY [14*(64-SizeAndAlign(s).Size()/16)] dst src mem)
|
(DUFFCOPY [10*(128-SizeAndAlign(s).Size()/4)] dst src mem)
|
||||||
// 14 and 64 are magic constants. 14 is the number of bytes to encode:
|
// 10 and 128 are magic constants. 10 is the number of bytes to encode:
|
||||||
// MOVUPS (SI), X0
|
// MOVL (SI), CX
|
||||||
// ADDL $16, SI
|
// ADDL $4, SI
|
||||||
// MOVUPS X0, (DI)
|
// MOVL CX, (DI)
|
||||||
// ADDL $16, DI
|
// ADDL $4, DI
|
||||||
// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
|
// and 128 is the number of such blocks. See src/runtime/duff_386.s:duffcopy.
|
||||||
|
|
||||||
// Large copying uses REP MOVSL.
|
// Large copying uses REP MOVSL.
|
||||||
(Move [s] dst src mem) && (SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0 ->
|
(Move [s] dst src mem) && (SizeAndAlign(s).Size() > 4*128 || config.noDuffDevice) && SizeAndAlign(s).Size()%4 == 0 ->
|
||||||
(REPMOVSL dst src (MOVLconst [SizeAndAlign(s).Size()/4]) mem)
|
(REPMOVSL dst src (MOVLconst [SizeAndAlign(s).Size()/4]) mem)
|
||||||
|
|
||||||
// Lowering Zero instructions
|
// Lowering Zero instructions
|
||||||
|
|
@ -309,11 +341,22 @@
|
||||||
(MOVLstoreconst [makeValAndOff(0,4)] destptr
|
(MOVLstoreconst [makeValAndOff(0,4)] destptr
|
||||||
(MOVLstoreconst [0] destptr mem))))
|
(MOVLstoreconst [0] destptr mem))))
|
||||||
|
|
||||||
|
// Medium zeroing uses a duff device.
|
||||||
|
(Zero [s] destptr mem)
|
||||||
|
&& SizeAndAlign(s).Size() > 16
|
||||||
|
&& SizeAndAlign(s).Size() <= 4*128
|
||||||
|
&& SizeAndAlign(s).Size()%4 == 0
|
||||||
|
&& !config.noDuffDevice ->
|
||||||
|
(DUFFZERO [1*(128-SizeAndAlign(s).Size()/4)] destptr (MOVLconst [0]) mem)
|
||||||
|
// 1 and 128 are magic constants. 1 is the number of bytes to encode STOSL.
|
||||||
|
// 128 is the number of STOSL instructions in duffzero.
|
||||||
|
// See src/runtime/duff_386.s:duffzero.
|
||||||
|
|
||||||
// Large zeroing uses REP STOSQ.
|
// Large zeroing uses REP STOSQ.
|
||||||
(Zero [s] destptr mem)
|
(Zero [s] destptr mem)
|
||||||
&& (SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32))
|
&& (SizeAndAlign(s).Size() > 4*128 || (config.noDuffDevice && SizeAndAlign(s).Size() > 16))
|
||||||
&& SizeAndAlign(s).Size()%8 == 0 ->
|
&& SizeAndAlign(s).Size()%4 == 0 ->
|
||||||
(REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/8]) (MOVLconst [0]) mem)
|
(REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/4]) (MOVLconst [0]) mem)
|
||||||
|
|
||||||
// Lowering constants
|
// Lowering constants
|
||||||
(Const8 [val]) -> (MOVLconst [val])
|
(Const8 [val]) -> (MOVLconst [val])
|
||||||
|
|
@ -596,14 +639,12 @@
|
||||||
(MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
|
(MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
|
||||||
(MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVSSload [off1+off2] {sym} ptr mem)
|
(MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVSSload [off1+off2] {sym} ptr mem)
|
||||||
(MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVSDload [off1+off2] {sym} ptr mem)
|
(MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVSDload [off1+off2] {sym} ptr mem)
|
||||||
(MOVOload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVOload [off1+off2] {sym} ptr mem)
|
|
||||||
|
|
||||||
(MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem)
|
(MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem)
|
||||||
(MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
|
(MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
|
||||||
(MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
|
(MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
|
||||||
(MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSSstore [off1+off2] {sym} ptr val mem)
|
(MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSSstore [off1+off2] {sym} ptr val mem)
|
||||||
(MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSDstore [off1+off2] {sym} ptr val mem)
|
(MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSDstore [off1+off2] {sym} ptr val mem)
|
||||||
(MOVOstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVOstore [off1+off2] {sym} ptr val mem)
|
|
||||||
|
|
||||||
// Fold constants into stores.
|
// Fold constants into stores.
|
||||||
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
|
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
|
||||||
|
|
@ -633,8 +674,6 @@
|
||||||
(MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
(MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
||||||
(MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
(MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||||
(MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
(MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
||||||
(MOVOload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
||||||
(MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
||||||
|
|
||||||
(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||||
(MOVBLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
(MOVBLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
||||||
|
|
@ -651,8 +690,6 @@
|
||||||
(MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
(MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
||||||
(MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
(MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||||
(MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
(MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
||||||
(MOVOstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
||||||
(MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
||||||
|
|
||||||
(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
||||||
(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
||||||
|
|
|
||||||
|
|
@ -330,8 +330,6 @@ func init() {
|
||||||
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
|
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
|
||||||
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
||||||
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
||||||
{name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
|
|
||||||
{name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
|
|
||||||
|
|
||||||
// indexed loads/stores
|
// indexed loads/stores
|
||||||
{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
|
{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
|
||||||
|
|
@ -360,7 +358,7 @@ func init() {
|
||||||
{name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... arg1 ...
|
{name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... arg1 ...
|
||||||
{name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... 4*arg1 ...
|
{name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... 4*arg1 ...
|
||||||
|
|
||||||
// arg0 = (duff-adjusted) pointer to start of memory to zero
|
// arg0 = pointer to start of memory to zero
|
||||||
// arg1 = value to store (will always be zero)
|
// arg1 = value to store (will always be zero)
|
||||||
// arg2 = mem
|
// arg2 = mem
|
||||||
// auxint = offset into duffzero code to start executing
|
// auxint = offset into duffzero code to start executing
|
||||||
|
|
@ -370,11 +368,10 @@ func init() {
|
||||||
aux: "Int64",
|
aux: "Int64",
|
||||||
argLength: 3,
|
argLength: 3,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []regMask{buildReg("DI"), buildReg("X0")},
|
inputs: []regMask{buildReg("DI"), buildReg("AX")},
|
||||||
clobbers: buildReg("DI FLAGS"),
|
clobbers: buildReg("DI FLAGS"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", aux: "Int128", rematerializeable: true},
|
|
||||||
|
|
||||||
// arg0 = address of memory to zero
|
// arg0 = address of memory to zero
|
||||||
// arg1 = # of 4-byte words to zero
|
// arg1 = # of 4-byte words to zero
|
||||||
|
|
@ -407,7 +404,7 @@ func init() {
|
||||||
argLength: 3,
|
argLength: 3,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []regMask{buildReg("DI"), buildReg("SI")},
|
inputs: []regMask{buildReg("DI"), buildReg("SI")},
|
||||||
clobbers: buildReg("DI SI X0 FLAGS"), // uses X0 as a temporary
|
clobbers: buildReg("DI SI CX FLAGS"), // uses CX as a temporary
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -400,8 +400,8 @@
|
||||||
(Zero [SizeAndAlign(s).Size()-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
|
(Zero [SizeAndAlign(s).Size()-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
|
||||||
(Zero [s] destptr mem)
|
(Zero [s] destptr mem)
|
||||||
&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice ->
|
&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice ->
|
||||||
(DUFFZERO [duffStart(SizeAndAlign(s).Size())]
|
(DUFFZERO [duffStartAMD64(SizeAndAlign(s).Size())]
|
||||||
(ADDQconst [duffAdj(SizeAndAlign(s).Size())] destptr) (MOVOconst [0])
|
(ADDQconst [duffAdjAMD64(SizeAndAlign(s).Size())] destptr) (MOVOconst [0])
|
||||||
mem)
|
mem)
|
||||||
|
|
||||||
// Large zeroing uses REP STOSQ.
|
// Large zeroing uses REP STOSQ.
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// This file contains rules to decompose [u]int32 types on 32-bit
|
// This file contains rules to decompose [u]int64 types on 32-bit
|
||||||
// architectures. These rules work together with the decomposeBuiltIn
|
// architectures. These rules work together with the decomposeBuiltIn
|
||||||
// pass which handles phis of these types.
|
// pass which handles phis of these types.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -285,8 +285,6 @@ const (
|
||||||
Op386MOVBstore
|
Op386MOVBstore
|
||||||
Op386MOVWstore
|
Op386MOVWstore
|
||||||
Op386MOVLstore
|
Op386MOVLstore
|
||||||
Op386MOVOload
|
|
||||||
Op386MOVOstore
|
|
||||||
Op386MOVBloadidx1
|
Op386MOVBloadidx1
|
||||||
Op386MOVWloadidx1
|
Op386MOVWloadidx1
|
||||||
Op386MOVWloadidx2
|
Op386MOVWloadidx2
|
||||||
|
|
@ -306,7 +304,6 @@ const (
|
||||||
Op386MOVLstoreconstidx1
|
Op386MOVLstoreconstidx1
|
||||||
Op386MOVLstoreconstidx4
|
Op386MOVLstoreconstidx4
|
||||||
Op386DUFFZERO
|
Op386DUFFZERO
|
||||||
Op386MOVOconst
|
|
||||||
Op386REPSTOSL
|
Op386REPSTOSL
|
||||||
Op386CALLstatic
|
Op386CALLstatic
|
||||||
Op386CALLclosure
|
Op386CALLclosure
|
||||||
|
|
@ -3152,32 +3149,6 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "MOVOload",
|
|
||||||
auxType: auxSymOff,
|
|
||||||
argLen: 2,
|
|
||||||
asm: x86.AMOVUPS,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{0, 65791}, // AX CX DX BX SP BP SI DI SB
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "MOVOstore",
|
|
||||||
auxType: auxSymOff,
|
|
||||||
argLen: 3,
|
|
||||||
asm: x86.AMOVUPS,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{1, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
|
||||||
{0, 65791}, // AX CX DX BX SP BP SI DI SB
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "MOVBloadidx1",
|
name: "MOVBloadidx1",
|
||||||
auxType: auxSymOff,
|
auxType: auxSymOff,
|
||||||
|
|
@ -3418,22 +3389,11 @@ var opcodeTable = [...]opInfo{
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{0, 128}, // DI
|
{0, 128}, // DI
|
||||||
{1, 256}, // X0
|
{1, 1}, // AX
|
||||||
},
|
},
|
||||||
clobbers: 131200, // DI FLAGS
|
clobbers: 131200, // DI FLAGS
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "MOVOconst",
|
|
||||||
auxType: auxInt128,
|
|
||||||
argLen: 0,
|
|
||||||
rematerializeable: true,
|
|
||||||
reg: regInfo{
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "REPSTOSL",
|
name: "REPSTOSL",
|
||||||
argLen: 4,
|
argLen: 4,
|
||||||
|
|
@ -3502,7 +3462,7 @@ var opcodeTable = [...]opInfo{
|
||||||
{0, 128}, // DI
|
{0, 128}, // DI
|
||||||
{1, 64}, // SI
|
{1, 64}, // SI
|
||||||
},
|
},
|
||||||
clobbers: 131520, // SI DI X0 FLAGS
|
clobbers: 131266, // CX SI DI FLAGS
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -254,9 +254,21 @@ func isSamePtr(p1, p2 *Value) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
|
func duffStartAMD64(size int64) int64 {
|
||||||
// See runtime/mkduff.go.
|
x, _ := duffAMD64(size)
|
||||||
const (
|
return x
|
||||||
|
}
|
||||||
|
func duffAdjAMD64(size int64) int64 {
|
||||||
|
_, x := duffAMD64(size)
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
|
||||||
|
// required to use the duffzero mechanism for a block of the given size.
|
||||||
|
func duffAMD64(size int64) (int64, int64) {
|
||||||
|
// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
|
||||||
|
// See runtime/mkduff.go.
|
||||||
|
const (
|
||||||
dzBlocks = 16 // number of MOV/ADD blocks
|
dzBlocks = 16 // number of MOV/ADD blocks
|
||||||
dzBlockLen = 4 // number of clears per block
|
dzBlockLen = 4 // number of clears per block
|
||||||
dzBlockSize = 19 // size of instructions in a single block
|
dzBlockSize = 19 // size of instructions in a single block
|
||||||
|
|
@ -269,24 +281,11 @@ const (
|
||||||
|
|
||||||
dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
|
dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
|
||||||
dzSize = dzBlocks * dzBlockSize
|
dzSize = dzBlocks * dzBlockSize
|
||||||
)
|
)
|
||||||
|
|
||||||
func duffStart(size int64) int64 {
|
|
||||||
x, _ := duff(size)
|
|
||||||
return x
|
|
||||||
}
|
|
||||||
func duffAdj(size int64) int64 {
|
|
||||||
_, x := duff(size)
|
|
||||||
return x
|
|
||||||
}
|
|
||||||
|
|
||||||
// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
|
|
||||||
// required to use the duffzero mechanism for a block of the given size.
|
|
||||||
func duff(size int64) (int64, int64) {
|
|
||||||
if size < 32 || size > 1024 || size%dzClearStep != 0 {
|
if size < 32 || size > 1024 || size%dzClearStep != 0 {
|
||||||
panic("bad duffzero size")
|
panic("bad duffzero size")
|
||||||
}
|
}
|
||||||
// TODO: arch-dependent
|
|
||||||
steps := size / dzClearStep
|
steps := size / dzClearStep
|
||||||
blocks := steps / dzBlockLen
|
blocks := steps / dzBlockLen
|
||||||
steps %= dzBlockLen
|
steps %= dzBlockLen
|
||||||
|
|
|
||||||
|
|
@ -240,18 +240,24 @@ func rewriteValue386(v *Value, config *Config) bool {
|
||||||
return rewriteValue386_OpLsh16x16(v, config)
|
return rewriteValue386_OpLsh16x16(v, config)
|
||||||
case OpLsh16x32:
|
case OpLsh16x32:
|
||||||
return rewriteValue386_OpLsh16x32(v, config)
|
return rewriteValue386_OpLsh16x32(v, config)
|
||||||
|
case OpLsh16x64:
|
||||||
|
return rewriteValue386_OpLsh16x64(v, config)
|
||||||
case OpLsh16x8:
|
case OpLsh16x8:
|
||||||
return rewriteValue386_OpLsh16x8(v, config)
|
return rewriteValue386_OpLsh16x8(v, config)
|
||||||
case OpLsh32x16:
|
case OpLsh32x16:
|
||||||
return rewriteValue386_OpLsh32x16(v, config)
|
return rewriteValue386_OpLsh32x16(v, config)
|
||||||
case OpLsh32x32:
|
case OpLsh32x32:
|
||||||
return rewriteValue386_OpLsh32x32(v, config)
|
return rewriteValue386_OpLsh32x32(v, config)
|
||||||
|
case OpLsh32x64:
|
||||||
|
return rewriteValue386_OpLsh32x64(v, config)
|
||||||
case OpLsh32x8:
|
case OpLsh32x8:
|
||||||
return rewriteValue386_OpLsh32x8(v, config)
|
return rewriteValue386_OpLsh32x8(v, config)
|
||||||
case OpLsh8x16:
|
case OpLsh8x16:
|
||||||
return rewriteValue386_OpLsh8x16(v, config)
|
return rewriteValue386_OpLsh8x16(v, config)
|
||||||
case OpLsh8x32:
|
case OpLsh8x32:
|
||||||
return rewriteValue386_OpLsh8x32(v, config)
|
return rewriteValue386_OpLsh8x32(v, config)
|
||||||
|
case OpLsh8x64:
|
||||||
|
return rewriteValue386_OpLsh8x64(v, config)
|
||||||
case OpLsh8x8:
|
case OpLsh8x8:
|
||||||
return rewriteValue386_OpLsh8x8(v, config)
|
return rewriteValue386_OpLsh8x8(v, config)
|
||||||
case Op386MOVBLSX:
|
case Op386MOVBLSX:
|
||||||
|
|
@ -290,10 +296,6 @@ func rewriteValue386(v *Value, config *Config) bool {
|
||||||
return rewriteValue386_Op386MOVLstoreidx1(v, config)
|
return rewriteValue386_Op386MOVLstoreidx1(v, config)
|
||||||
case Op386MOVLstoreidx4:
|
case Op386MOVLstoreidx4:
|
||||||
return rewriteValue386_Op386MOVLstoreidx4(v, config)
|
return rewriteValue386_Op386MOVLstoreidx4(v, config)
|
||||||
case Op386MOVOload:
|
|
||||||
return rewriteValue386_Op386MOVOload(v, config)
|
|
||||||
case Op386MOVOstore:
|
|
||||||
return rewriteValue386_Op386MOVOstore(v, config)
|
|
||||||
case Op386MOVSDload:
|
case Op386MOVSDload:
|
||||||
return rewriteValue386_Op386MOVSDload(v, config)
|
return rewriteValue386_Op386MOVSDload(v, config)
|
||||||
case Op386MOVSDloadidx1:
|
case Op386MOVSDloadidx1:
|
||||||
|
|
@ -428,36 +430,48 @@ func rewriteValue386(v *Value, config *Config) bool {
|
||||||
return rewriteValue386_OpRsh16Ux16(v, config)
|
return rewriteValue386_OpRsh16Ux16(v, config)
|
||||||
case OpRsh16Ux32:
|
case OpRsh16Ux32:
|
||||||
return rewriteValue386_OpRsh16Ux32(v, config)
|
return rewriteValue386_OpRsh16Ux32(v, config)
|
||||||
|
case OpRsh16Ux64:
|
||||||
|
return rewriteValue386_OpRsh16Ux64(v, config)
|
||||||
case OpRsh16Ux8:
|
case OpRsh16Ux8:
|
||||||
return rewriteValue386_OpRsh16Ux8(v, config)
|
return rewriteValue386_OpRsh16Ux8(v, config)
|
||||||
case OpRsh16x16:
|
case OpRsh16x16:
|
||||||
return rewriteValue386_OpRsh16x16(v, config)
|
return rewriteValue386_OpRsh16x16(v, config)
|
||||||
case OpRsh16x32:
|
case OpRsh16x32:
|
||||||
return rewriteValue386_OpRsh16x32(v, config)
|
return rewriteValue386_OpRsh16x32(v, config)
|
||||||
|
case OpRsh16x64:
|
||||||
|
return rewriteValue386_OpRsh16x64(v, config)
|
||||||
case OpRsh16x8:
|
case OpRsh16x8:
|
||||||
return rewriteValue386_OpRsh16x8(v, config)
|
return rewriteValue386_OpRsh16x8(v, config)
|
||||||
case OpRsh32Ux16:
|
case OpRsh32Ux16:
|
||||||
return rewriteValue386_OpRsh32Ux16(v, config)
|
return rewriteValue386_OpRsh32Ux16(v, config)
|
||||||
case OpRsh32Ux32:
|
case OpRsh32Ux32:
|
||||||
return rewriteValue386_OpRsh32Ux32(v, config)
|
return rewriteValue386_OpRsh32Ux32(v, config)
|
||||||
|
case OpRsh32Ux64:
|
||||||
|
return rewriteValue386_OpRsh32Ux64(v, config)
|
||||||
case OpRsh32Ux8:
|
case OpRsh32Ux8:
|
||||||
return rewriteValue386_OpRsh32Ux8(v, config)
|
return rewriteValue386_OpRsh32Ux8(v, config)
|
||||||
case OpRsh32x16:
|
case OpRsh32x16:
|
||||||
return rewriteValue386_OpRsh32x16(v, config)
|
return rewriteValue386_OpRsh32x16(v, config)
|
||||||
case OpRsh32x32:
|
case OpRsh32x32:
|
||||||
return rewriteValue386_OpRsh32x32(v, config)
|
return rewriteValue386_OpRsh32x32(v, config)
|
||||||
|
case OpRsh32x64:
|
||||||
|
return rewriteValue386_OpRsh32x64(v, config)
|
||||||
case OpRsh32x8:
|
case OpRsh32x8:
|
||||||
return rewriteValue386_OpRsh32x8(v, config)
|
return rewriteValue386_OpRsh32x8(v, config)
|
||||||
case OpRsh8Ux16:
|
case OpRsh8Ux16:
|
||||||
return rewriteValue386_OpRsh8Ux16(v, config)
|
return rewriteValue386_OpRsh8Ux16(v, config)
|
||||||
case OpRsh8Ux32:
|
case OpRsh8Ux32:
|
||||||
return rewriteValue386_OpRsh8Ux32(v, config)
|
return rewriteValue386_OpRsh8Ux32(v, config)
|
||||||
|
case OpRsh8Ux64:
|
||||||
|
return rewriteValue386_OpRsh8Ux64(v, config)
|
||||||
case OpRsh8Ux8:
|
case OpRsh8Ux8:
|
||||||
return rewriteValue386_OpRsh8Ux8(v, config)
|
return rewriteValue386_OpRsh8Ux8(v, config)
|
||||||
case OpRsh8x16:
|
case OpRsh8x16:
|
||||||
return rewriteValue386_OpRsh8x16(v, config)
|
return rewriteValue386_OpRsh8x16(v, config)
|
||||||
case OpRsh8x32:
|
case OpRsh8x32:
|
||||||
return rewriteValue386_OpRsh8x32(v, config)
|
return rewriteValue386_OpRsh8x32(v, config)
|
||||||
|
case OpRsh8x64:
|
||||||
|
return rewriteValue386_OpRsh8x64(v, config)
|
||||||
case OpRsh8x8:
|
case OpRsh8x8:
|
||||||
return rewriteValue386_OpRsh8x8(v, config)
|
return rewriteValue386_OpRsh8x8(v, config)
|
||||||
case Op386SARB:
|
case Op386SARB:
|
||||||
|
|
@ -516,6 +530,8 @@ func rewriteValue386(v *Value, config *Config) bool {
|
||||||
return rewriteValue386_OpSignExt8to16(v, config)
|
return rewriteValue386_OpSignExt8to16(v, config)
|
||||||
case OpSignExt8to32:
|
case OpSignExt8to32:
|
||||||
return rewriteValue386_OpSignExt8to32(v, config)
|
return rewriteValue386_OpSignExt8to32(v, config)
|
||||||
|
case OpSignmask:
|
||||||
|
return rewriteValue386_OpSignmask(v, config)
|
||||||
case OpSqrt:
|
case OpSqrt:
|
||||||
return rewriteValue386_OpSqrt(v, config)
|
return rewriteValue386_OpSqrt(v, config)
|
||||||
case OpStaticCall:
|
case OpStaticCall:
|
||||||
|
|
@ -562,6 +578,8 @@ func rewriteValue386(v *Value, config *Config) bool {
|
||||||
return rewriteValue386_OpZeroExt8to16(v, config)
|
return rewriteValue386_OpZeroExt8to16(v, config)
|
||||||
case OpZeroExt8to32:
|
case OpZeroExt8to32:
|
||||||
return rewriteValue386_OpZeroExt8to32(v, config)
|
return rewriteValue386_OpZeroExt8to32(v, config)
|
||||||
|
case OpZeromask:
|
||||||
|
return rewriteValue386_OpZeromask(v, config)
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
@ -4062,6 +4080,45 @@ func rewriteValue386_OpLsh16x32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpLsh16x64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Lsh16x64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 16
|
||||||
|
// result: (SHLLconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 16) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SHLLconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Lsh16x64 _ (Const64 [c]))
|
||||||
|
// cond: uint64(c) >= 16
|
||||||
|
// result: (Const16 [0])
|
||||||
|
for {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) >= 16) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpConst16)
|
||||||
|
v.AuxInt = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpLsh16x8(v *Value, config *Config) bool {
|
func rewriteValue386_OpLsh16x8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -4134,6 +4191,45 @@ func rewriteValue386_OpLsh32x32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpLsh32x64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Lsh32x64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 32
|
||||||
|
// result: (SHLLconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 32) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SHLLconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Lsh32x64 _ (Const64 [c]))
|
||||||
|
// cond: uint64(c) >= 32
|
||||||
|
// result: (Const32 [0])
|
||||||
|
for {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) >= 32) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpConst32)
|
||||||
|
v.AuxInt = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpLsh32x8(v *Value, config *Config) bool {
|
func rewriteValue386_OpLsh32x8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -4206,6 +4302,45 @@ func rewriteValue386_OpLsh8x32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpLsh8x64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Lsh8x64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 8
|
||||||
|
// result: (SHLLconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 8) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SHLLconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Lsh8x64 _ (Const64 [c]))
|
||||||
|
// cond: uint64(c) >= 8
|
||||||
|
// result: (Const8 [0])
|
||||||
|
for {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) >= 8) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpConst8)
|
||||||
|
v.AuxInt = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpLsh8x8(v *Value, config *Config) bool {
|
func rewriteValue386_OpLsh8x8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -5997,114 +6132,6 @@ func rewriteValue386_Op386MOVLstoreidx4(v *Value, config *Config) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValue386_Op386MOVOload(v *Value, config *Config) bool {
|
|
||||||
b := v.Block
|
|
||||||
_ = b
|
|
||||||
// match: (MOVOload [off1] {sym} (ADDLconst [off2] ptr) mem)
|
|
||||||
// cond: is32Bit(off1+off2)
|
|
||||||
// result: (MOVOload [off1+off2] {sym} ptr mem)
|
|
||||||
for {
|
|
||||||
off1 := v.AuxInt
|
|
||||||
sym := v.Aux
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
if v_0.Op != Op386ADDLconst {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
off2 := v_0.AuxInt
|
|
||||||
ptr := v_0.Args[0]
|
|
||||||
mem := v.Args[1]
|
|
||||||
if !(is32Bit(off1 + off2)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(Op386MOVOload)
|
|
||||||
v.AuxInt = off1 + off2
|
|
||||||
v.Aux = sym
|
|
||||||
v.AddArg(ptr)
|
|
||||||
v.AddArg(mem)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (MOVOload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
|
|
||||||
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
||||||
// result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
||||||
for {
|
|
||||||
off1 := v.AuxInt
|
|
||||||
sym1 := v.Aux
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
if v_0.Op != Op386LEAL {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
off2 := v_0.AuxInt
|
|
||||||
sym2 := v_0.Aux
|
|
||||||
base := v_0.Args[0]
|
|
||||||
mem := v.Args[1]
|
|
||||||
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(Op386MOVOload)
|
|
||||||
v.AuxInt = off1 + off2
|
|
||||||
v.Aux = mergeSym(sym1, sym2)
|
|
||||||
v.AddArg(base)
|
|
||||||
v.AddArg(mem)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
func rewriteValue386_Op386MOVOstore(v *Value, config *Config) bool {
|
|
||||||
b := v.Block
|
|
||||||
_ = b
|
|
||||||
// match: (MOVOstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
|
|
||||||
// cond: is32Bit(off1+off2)
|
|
||||||
// result: (MOVOstore [off1+off2] {sym} ptr val mem)
|
|
||||||
for {
|
|
||||||
off1 := v.AuxInt
|
|
||||||
sym := v.Aux
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
if v_0.Op != Op386ADDLconst {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
off2 := v_0.AuxInt
|
|
||||||
ptr := v_0.Args[0]
|
|
||||||
val := v.Args[1]
|
|
||||||
mem := v.Args[2]
|
|
||||||
if !(is32Bit(off1 + off2)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(Op386MOVOstore)
|
|
||||||
v.AuxInt = off1 + off2
|
|
||||||
v.Aux = sym
|
|
||||||
v.AddArg(ptr)
|
|
||||||
v.AddArg(val)
|
|
||||||
v.AddArg(mem)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (MOVOstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
|
|
||||||
// cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
|
|
||||||
// result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
||||||
for {
|
|
||||||
off1 := v.AuxInt
|
|
||||||
sym1 := v.Aux
|
|
||||||
v_0 := v.Args[0]
|
|
||||||
if v_0.Op != Op386LEAL {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
off2 := v_0.AuxInt
|
|
||||||
sym2 := v_0.Aux
|
|
||||||
base := v_0.Args[0]
|
|
||||||
val := v.Args[1]
|
|
||||||
mem := v.Args[2]
|
|
||||||
if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(Op386MOVOstore)
|
|
||||||
v.AuxInt = off1 + off2
|
|
||||||
v.Aux = mergeSym(sym1, sym2)
|
|
||||||
v.AddArg(base)
|
|
||||||
v.AddArg(val)
|
|
||||||
v.AddArg(mem)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
func rewriteValue386_Op386MOVSDload(v *Value, config *Config) bool {
|
func rewriteValue386_Op386MOVSDload(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -9073,26 +9100,6 @@ func rewriteValue386_OpMove(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Move [s] dst src mem)
|
// match: (Move [s] dst src mem)
|
||||||
// cond: SizeAndAlign(s).Size() == 16
|
|
||||||
// result: (MOVOstore dst (MOVOload src mem) mem)
|
|
||||||
for {
|
|
||||||
s := v.AuxInt
|
|
||||||
dst := v.Args[0]
|
|
||||||
src := v.Args[1]
|
|
||||||
mem := v.Args[2]
|
|
||||||
if !(SizeAndAlign(s).Size() == 16) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(Op386MOVOstore)
|
|
||||||
v.AddArg(dst)
|
|
||||||
v0 := b.NewValue0(v.Line, Op386MOVOload, TypeInt128)
|
|
||||||
v0.AddArg(src)
|
|
||||||
v0.AddArg(mem)
|
|
||||||
v.AddArg(v0)
|
|
||||||
v.AddArg(mem)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// match: (Move [s] dst src mem)
|
|
||||||
// cond: SizeAndAlign(s).Size() == 3
|
// cond: SizeAndAlign(s).Size() == 3
|
||||||
// result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
|
// result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
|
||||||
for {
|
for {
|
||||||
|
|
@ -9209,32 +9216,92 @@ func rewriteValue386_OpMove(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Move [s] dst src mem)
|
// match: (Move [s] dst src mem)
|
||||||
// cond: SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice
|
// cond: SizeAndAlign(s).Size() == 8
|
||||||
// result: (DUFFCOPY [14*(64-SizeAndAlign(s).Size()/16)] dst src mem)
|
// result: (MOVLstore [4] dst (MOVLload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
|
||||||
for {
|
for {
|
||||||
s := v.AuxInt
|
s := v.AuxInt
|
||||||
dst := v.Args[0]
|
dst := v.Args[0]
|
||||||
src := v.Args[1]
|
src := v.Args[1]
|
||||||
mem := v.Args[2]
|
mem := v.Args[2]
|
||||||
if !(SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice) {
|
if !(SizeAndAlign(s).Size() == 8) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386MOVLstore)
|
||||||
|
v.AuxInt = 4
|
||||||
|
v.AddArg(dst)
|
||||||
|
v0 := b.NewValue0(v.Line, Op386MOVLload, config.fe.TypeUInt32())
|
||||||
|
v0.AuxInt = 4
|
||||||
|
v0.AddArg(src)
|
||||||
|
v0.AddArg(mem)
|
||||||
|
v.AddArg(v0)
|
||||||
|
v1 := b.NewValue0(v.Line, Op386MOVLstore, TypeMem)
|
||||||
|
v1.AddArg(dst)
|
||||||
|
v2 := b.NewValue0(v.Line, Op386MOVLload, config.fe.TypeUInt32())
|
||||||
|
v2.AddArg(src)
|
||||||
|
v2.AddArg(mem)
|
||||||
|
v1.AddArg(v2)
|
||||||
|
v1.AddArg(mem)
|
||||||
|
v.AddArg(v1)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Move [s] dst src mem)
|
||||||
|
// cond: SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size()%4 != 0
|
||||||
|
// result: (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%4] (ADDLconst <dst.Type> dst [SizeAndAlign(s).Size()%4]) (ADDLconst <src.Type> src [SizeAndAlign(s).Size()%4]) (MOVLstore dst (MOVLload src mem) mem))
|
||||||
|
for {
|
||||||
|
s := v.AuxInt
|
||||||
|
dst := v.Args[0]
|
||||||
|
src := v.Args[1]
|
||||||
|
mem := v.Args[2]
|
||||||
|
if !(SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size()%4 != 0) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpMove)
|
||||||
|
v.AuxInt = SizeAndAlign(s).Size() - SizeAndAlign(s).Size()%4
|
||||||
|
v0 := b.NewValue0(v.Line, Op386ADDLconst, dst.Type)
|
||||||
|
v0.AddArg(dst)
|
||||||
|
v0.AuxInt = SizeAndAlign(s).Size() % 4
|
||||||
|
v.AddArg(v0)
|
||||||
|
v1 := b.NewValue0(v.Line, Op386ADDLconst, src.Type)
|
||||||
|
v1.AddArg(src)
|
||||||
|
v1.AuxInt = SizeAndAlign(s).Size() % 4
|
||||||
|
v.AddArg(v1)
|
||||||
|
v2 := b.NewValue0(v.Line, Op386MOVLstore, TypeMem)
|
||||||
|
v2.AddArg(dst)
|
||||||
|
v3 := b.NewValue0(v.Line, Op386MOVLload, config.fe.TypeUInt32())
|
||||||
|
v3.AddArg(src)
|
||||||
|
v3.AddArg(mem)
|
||||||
|
v2.AddArg(v3)
|
||||||
|
v2.AddArg(mem)
|
||||||
|
v.AddArg(v2)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Move [s] dst src mem)
|
||||||
|
// cond: SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice
|
||||||
|
// result: (DUFFCOPY [10*(128-SizeAndAlign(s).Size()/4)] dst src mem)
|
||||||
|
for {
|
||||||
|
s := v.AuxInt
|
||||||
|
dst := v.Args[0]
|
||||||
|
src := v.Args[1]
|
||||||
|
mem := v.Args[2]
|
||||||
|
if !(SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(Op386DUFFCOPY)
|
v.reset(Op386DUFFCOPY)
|
||||||
v.AuxInt = 14 * (64 - SizeAndAlign(s).Size()/16)
|
v.AuxInt = 10 * (128 - SizeAndAlign(s).Size()/4)
|
||||||
v.AddArg(dst)
|
v.AddArg(dst)
|
||||||
v.AddArg(src)
|
v.AddArg(src)
|
||||||
v.AddArg(mem)
|
v.AddArg(mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Move [s] dst src mem)
|
// match: (Move [s] dst src mem)
|
||||||
// cond: (SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0
|
// cond: (SizeAndAlign(s).Size() > 4*128 || config.noDuffDevice) && SizeAndAlign(s).Size()%4 == 0
|
||||||
// result: (REPMOVSL dst src (MOVLconst [SizeAndAlign(s).Size()/4]) mem)
|
// result: (REPMOVSL dst src (MOVLconst [SizeAndAlign(s).Size()/4]) mem)
|
||||||
for {
|
for {
|
||||||
s := v.AuxInt
|
s := v.AuxInt
|
||||||
dst := v.Args[0]
|
dst := v.Args[0]
|
||||||
src := v.Args[1]
|
src := v.Args[1]
|
||||||
mem := v.Args[2]
|
mem := v.Args[2]
|
||||||
if !((SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0) {
|
if !((SizeAndAlign(s).Size() > 4*128 || config.noDuffDevice) && SizeAndAlign(s).Size()%4 == 0) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(Op386REPMOVSL)
|
v.reset(Op386REPMOVSL)
|
||||||
|
|
@ -10006,32 +10073,16 @@ func rewriteValue386_OpOffPtr(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
// match: (OffPtr [off] ptr)
|
// match: (OffPtr [off] ptr)
|
||||||
// cond: is32Bit(off)
|
// cond:
|
||||||
// result: (ADDLconst [off] ptr)
|
// result: (ADDLconst [off] ptr)
|
||||||
for {
|
for {
|
||||||
off := v.AuxInt
|
off := v.AuxInt
|
||||||
ptr := v.Args[0]
|
ptr := v.Args[0]
|
||||||
if !(is32Bit(off)) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
v.reset(Op386ADDLconst)
|
v.reset(Op386ADDLconst)
|
||||||
v.AuxInt = off
|
v.AuxInt = off
|
||||||
v.AddArg(ptr)
|
v.AddArg(ptr)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (OffPtr [off] ptr)
|
|
||||||
// cond:
|
|
||||||
// result: (ADDL (MOVLconst [off]) ptr)
|
|
||||||
for {
|
|
||||||
off := v.AuxInt
|
|
||||||
ptr := v.Args[0]
|
|
||||||
v.reset(Op386ADDL)
|
|
||||||
v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
|
||||||
v0.AuxInt = off
|
|
||||||
v.AddArg(v0)
|
|
||||||
v.AddArg(ptr)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
func rewriteValue386_OpOr16(v *Value, config *Config) bool {
|
func rewriteValue386_OpOr16(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
|
|
@ -10243,6 +10294,45 @@ func rewriteValue386_OpRsh16Ux32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpRsh16Ux64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Rsh16Ux64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 16
|
||||||
|
// result: (SHRWconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 16) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SHRWconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Rsh16Ux64 _ (Const64 [c]))
|
||||||
|
// cond: uint64(c) >= 16
|
||||||
|
// result: (Const16 [0])
|
||||||
|
for {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) >= 16) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpConst16)
|
||||||
|
v.AuxInt = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpRsh16Ux8(v *Value, config *Config) bool {
|
func rewriteValue386_OpRsh16Ux8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -10321,6 +10411,29 @@ func rewriteValue386_OpRsh16x32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpRsh16x64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Rsh16x64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 16
|
||||||
|
// result: (SARWconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 16) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SARWconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpRsh16x8(v *Value, config *Config) bool {
|
func rewriteValue386_OpRsh16x8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -10396,6 +10509,45 @@ func rewriteValue386_OpRsh32Ux32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpRsh32Ux64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Rsh32Ux64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 32
|
||||||
|
// result: (SHRLconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 32) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SHRLconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Rsh32Ux64 _ (Const64 [c]))
|
||||||
|
// cond: uint64(c) >= 32
|
||||||
|
// result: (Const32 [0])
|
||||||
|
for {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) >= 32) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpConst32)
|
||||||
|
v.AuxInt = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpRsh32Ux8(v *Value, config *Config) bool {
|
func rewriteValue386_OpRsh32Ux8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -10474,6 +10626,29 @@ func rewriteValue386_OpRsh32x32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpRsh32x64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Rsh32x64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 32
|
||||||
|
// result: (SARLconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 32) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SARLconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpRsh32x8(v *Value, config *Config) bool {
|
func rewriteValue386_OpRsh32x8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -10549,6 +10724,45 @@ func rewriteValue386_OpRsh8Ux32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpRsh8Ux64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Rsh8Ux64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 8
|
||||||
|
// result: (SHRBconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 8) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SHRBconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Rsh8Ux64 _ (Const64 [c]))
|
||||||
|
// cond: uint64(c) >= 8
|
||||||
|
// result: (Const8 [0])
|
||||||
|
for {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) >= 8) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpConst8)
|
||||||
|
v.AuxInt = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpRsh8Ux8(v *Value, config *Config) bool {
|
func rewriteValue386_OpRsh8Ux8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -10627,6 +10841,29 @@ func rewriteValue386_OpRsh8x32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpRsh8x64(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Rsh8x64 x (Const64 [c]))
|
||||||
|
// cond: uint64(c) < 8
|
||||||
|
// result: (SARBconst x [c])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
if v_1.Op != OpConst64 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v_1.AuxInt
|
||||||
|
if !(uint64(c) < 8) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386SARBconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = c
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValue386_OpRsh8x8(v *Value, config *Config) bool {
|
func rewriteValue386_OpRsh8x8(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -12014,6 +12251,20 @@ func rewriteValue386_OpSignExt8to32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpSignmask(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Signmask x)
|
||||||
|
// cond:
|
||||||
|
// result: (SARLconst x [31])
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(Op386SARLconst)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AuxInt = 31
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValue386_OpSqrt(v *Value, config *Config) bool {
|
func rewriteValue386_OpSqrt(v *Value, config *Config) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
_ = b
|
_ = b
|
||||||
|
|
@ -12681,19 +12932,38 @@ func rewriteValue386_OpZero(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Zero [s] destptr mem)
|
// match: (Zero [s] destptr mem)
|
||||||
// cond: (SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32)) && SizeAndAlign(s).Size()%8 == 0
|
// cond: SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice
|
||||||
// result: (REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/8]) (MOVLconst [0]) mem)
|
// result: (DUFFZERO [1*(128-SizeAndAlign(s).Size()/4)] destptr (MOVLconst [0]) mem)
|
||||||
for {
|
for {
|
||||||
s := v.AuxInt
|
s := v.AuxInt
|
||||||
destptr := v.Args[0]
|
destptr := v.Args[0]
|
||||||
mem := v.Args[1]
|
mem := v.Args[1]
|
||||||
if !((SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32)) && SizeAndAlign(s).Size()%8 == 0) {
|
if !(SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(Op386DUFFZERO)
|
||||||
|
v.AuxInt = 1 * (128 - SizeAndAlign(s).Size()/4)
|
||||||
|
v.AddArg(destptr)
|
||||||
|
v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
||||||
|
v0.AuxInt = 0
|
||||||
|
v.AddArg(v0)
|
||||||
|
v.AddArg(mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// match: (Zero [s] destptr mem)
|
||||||
|
// cond: (SizeAndAlign(s).Size() > 4*128 || (config.noDuffDevice && SizeAndAlign(s).Size() > 16)) && SizeAndAlign(s).Size()%4 == 0
|
||||||
|
// result: (REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/4]) (MOVLconst [0]) mem)
|
||||||
|
for {
|
||||||
|
s := v.AuxInt
|
||||||
|
destptr := v.Args[0]
|
||||||
|
mem := v.Args[1]
|
||||||
|
if !((SizeAndAlign(s).Size() > 4*128 || (config.noDuffDevice && SizeAndAlign(s).Size() > 16)) && SizeAndAlign(s).Size()%4 == 0) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(Op386REPSTOSL)
|
v.reset(Op386REPSTOSL)
|
||||||
v.AddArg(destptr)
|
v.AddArg(destptr)
|
||||||
v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
||||||
v0.AuxInt = SizeAndAlign(s).Size() / 8
|
v0.AuxInt = SizeAndAlign(s).Size() / 4
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
v1 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
v1 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
||||||
v1.AuxInt = 0
|
v1.AuxInt = 0
|
||||||
|
|
@ -12742,6 +13012,24 @@ func rewriteValue386_OpZeroExt8to32(v *Value, config *Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValue386_OpZeromask(v *Value, config *Config) bool {
|
||||||
|
b := v.Block
|
||||||
|
_ = b
|
||||||
|
// match: (Zeromask x)
|
||||||
|
// cond:
|
||||||
|
// result: (SBBLcarrymask (CMPL (MOVLconst [0]) x))
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(Op386SBBLcarrymask)
|
||||||
|
v0 := b.NewValue0(v.Line, Op386CMPL, TypeFlags)
|
||||||
|
v1 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
|
||||||
|
v1.AuxInt = 0
|
||||||
|
v0.AddArg(v1)
|
||||||
|
v0.AddArg(x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteBlock386(b *Block) bool {
|
func rewriteBlock386(b *Block) bool {
|
||||||
switch b.Kind {
|
switch b.Kind {
|
||||||
case Block386EQ:
|
case Block386EQ:
|
||||||
|
|
|
||||||
|
|
@ -17415,7 +17415,7 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool {
|
||||||
}
|
}
|
||||||
// match: (Zero [s] destptr mem)
|
// match: (Zero [s] destptr mem)
|
||||||
// cond: SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice
|
// cond: SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice
|
||||||
// result: (DUFFZERO [duffStart(SizeAndAlign(s).Size())] (ADDQconst [duffAdj(SizeAndAlign(s).Size())] destptr) (MOVOconst [0]) mem)
|
// result: (DUFFZERO [duffStartAMD64(SizeAndAlign(s).Size())] (ADDQconst [duffAdjAMD64(SizeAndAlign(s).Size())] destptr) (MOVOconst [0]) mem)
|
||||||
for {
|
for {
|
||||||
s := v.AuxInt
|
s := v.AuxInt
|
||||||
destptr := v.Args[0]
|
destptr := v.Args[0]
|
||||||
|
|
@ -17424,9 +17424,9 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
v.reset(OpAMD64DUFFZERO)
|
v.reset(OpAMD64DUFFZERO)
|
||||||
v.AuxInt = duffStart(SizeAndAlign(s).Size())
|
v.AuxInt = duffStartAMD64(SizeAndAlign(s).Size())
|
||||||
v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64())
|
v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64())
|
||||||
v0.AuxInt = duffAdj(SizeAndAlign(s).Size())
|
v0.AuxInt = duffAdjAMD64(SizeAndAlign(s).Size())
|
||||||
v0.AddArg(destptr)
|
v0.AddArg(destptr)
|
||||||
v.AddArg(v0)
|
v.AddArg(v0)
|
||||||
v1 := b.NewValue0(v.Line, OpAMD64MOVOconst, TypeInt128)
|
v1 := b.NewValue0(v.Line, OpAMD64MOVOconst, TypeInt128)
|
||||||
|
|
|
||||||
|
|
@ -101,11 +101,14 @@ func storeByType(t ssa.Type) obj.As {
|
||||||
// moveByType returns the reg->reg move instruction of the given type.
|
// moveByType returns the reg->reg move instruction of the given type.
|
||||||
func moveByType(t ssa.Type) obj.As {
|
func moveByType(t ssa.Type) obj.As {
|
||||||
if t.IsFloat() {
|
if t.IsFloat() {
|
||||||
// Moving the whole sse2 register is faster
|
switch t.Size() {
|
||||||
// than moving just the correct low portion of it.
|
case 4:
|
||||||
// There is no xmm->xmm move with 1 byte opcode,
|
return x86.AMOVSS
|
||||||
// so use movups, which has 2 byte opcode.
|
case 8:
|
||||||
return x86.AMOVUPS
|
return x86.AMOVSD
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
switch t.Size() {
|
switch t.Size() {
|
||||||
case 1:
|
case 1:
|
||||||
|
|
@ -115,8 +118,6 @@ func moveByType(t ssa.Type) obj.As {
|
||||||
return x86.AMOVL
|
return x86.AMOVL
|
||||||
case 4:
|
case 4:
|
||||||
return x86.AMOVL
|
return x86.AMOVL
|
||||||
case 16:
|
|
||||||
return x86.AMOVUPS // int128s are in SSE registers
|
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
|
panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
|
||||||
}
|
}
|
||||||
|
|
@ -448,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = x
|
p.To.Reg = x
|
||||||
case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload, ssa.Op386MOVOload:
|
case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
|
||||||
p := gc.Prog(v.Op.Asm())
|
p := gc.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
p.From.Reg = gc.SSARegNum(v.Args[0])
|
p.From.Reg = gc.SSARegNum(v.Args[0])
|
||||||
|
|
@ -496,7 +497,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
gc.AddAux(&p.From, v)
|
gc.AddAux(&p.From, v)
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = gc.SSARegNum(v)
|
p.To.Reg = gc.SSARegNum(v)
|
||||||
case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore, ssa.Op386MOVOstore:
|
case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
|
||||||
p := gc.Prog(v.Op.Asm())
|
p := gc.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
p.From.Reg = gc.SSARegNum(v.Args[1])
|
p.From.Reg = gc.SSARegNum(v.Args[1])
|
||||||
|
|
@ -584,12 +585,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
p.To.Type = obj.TYPE_ADDR
|
p.To.Type = obj.TYPE_ADDR
|
||||||
p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
|
p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
|
||||||
p.To.Offset = v.AuxInt
|
p.To.Offset = v.AuxInt
|
||||||
case ssa.Op386MOVOconst:
|
|
||||||
if v.AuxInt != 0 {
|
|
||||||
v.Unimplementedf("MOVOconst can only do constant=0")
|
|
||||||
}
|
|
||||||
r := gc.SSARegNum(v)
|
|
||||||
opregreg(x86.AXORPS, r, r)
|
|
||||||
case ssa.Op386DUFFCOPY:
|
case ssa.Op386DUFFCOPY:
|
||||||
p := gc.Prog(obj.ADUFFCOPY)
|
p := gc.Prog(obj.ADUFFCOPY)
|
||||||
p.To.Type = obj.TYPE_ADDR
|
p.To.Type = obj.TYPE_ADDR
|
||||||
|
|
@ -828,8 +823,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
case ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload,
|
case ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload,
|
||||||
ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
|
ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
|
||||||
ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload,
|
ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload,
|
||||||
ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVOload,
|
ssa.Op386MOVSSload, ssa.Op386MOVSDload,
|
||||||
ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVOstore:
|
ssa.Op386MOVSSstore, ssa.Op386MOVSDstore:
|
||||||
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
|
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
|
||||||
if gc.Debug_checknil != 0 && int(v.Line) > 1 {
|
if gc.Debug_checknil != 0 && int(v.Line) > 1 {
|
||||||
gc.Warnl(v.Line, "removed nil check")
|
gc.Warnl(v.Line, "removed nil check")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue