2016-09-12 14:50:10 -04:00
|
|
|
// Copyright 2016 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
// Lowering arithmetic
|
2018-02-21 19:00:21 +01:00
|
|
|
(Add(64|Ptr) x y) -> (ADD x y)
|
|
|
|
|
(Add(32|16|8) x y) -> (ADDW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Add32F x y) -> (FADDS x y)
|
|
|
|
|
(Add64F x y) -> (FADD x y)
|
|
|
|
|
|
2018-02-21 19:00:21 +01:00
|
|
|
(Sub(64|Ptr) x y) -> (SUB x y)
|
|
|
|
|
(Sub(32|16|8) x y) -> (SUBW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Sub32F x y) -> (FSUBS x y)
|
|
|
|
|
(Sub64F x y) -> (FSUB x y)
|
|
|
|
|
|
|
|
|
|
(Mul64 x y) -> (MULLD x y)
|
2018-02-21 19:00:21 +01:00
|
|
|
(Mul(32|16|8) x y) -> (MULLW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Mul32F x y) -> (FMULS x y)
|
|
|
|
|
(Mul64F x y) -> (FMUL x y)
|
2019-09-08 18:50:24 -04:00
|
|
|
(Mul64uhilo x y) -> (MLGR x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(Div32F x y) -> (FDIVS x y)
|
|
|
|
|
(Div64F x y) -> (FDIV x y)
|
|
|
|
|
|
|
|
|
|
(Div64 x y) -> (DIVD x y)
|
|
|
|
|
(Div64u x y) -> (DIVDU x y)
|
|
|
|
|
// DIVW/DIVWU has a 64-bit dividend and a 32-bit divisor,
|
|
|
|
|
// so a sign/zero extension of the dividend is required.
|
|
|
|
|
(Div32 x y) -> (DIVW (MOVWreg x) y)
|
|
|
|
|
(Div32u x y) -> (DIVWU (MOVWZreg x) y)
|
|
|
|
|
(Div16 x y) -> (DIVW (MOVHreg x) (MOVHreg y))
|
|
|
|
|
(Div16u x y) -> (DIVWU (MOVHZreg x) (MOVHZreg y))
|
|
|
|
|
(Div8 x y) -> (DIVW (MOVBreg x) (MOVBreg y))
|
|
|
|
|
(Div8u x y) -> (DIVWU (MOVBZreg x) (MOVBZreg y))
|
|
|
|
|
|
2018-02-21 19:00:21 +01:00
|
|
|
(Hmul(64|64u) x y) -> (MULH(D|DU) x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Hmul32 x y) -> (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y)))
|
|
|
|
|
(Hmul32u x y) -> (SRDconst [32] (MULLD (MOVWZreg x) (MOVWZreg y)))
|
|
|
|
|
|
2018-02-21 19:00:21 +01:00
|
|
|
(Mod(64|64u) x y) -> (MOD(D|DU) x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
// MODW/MODWU has a 64-bit dividend and a 32-bit divisor,
|
|
|
|
|
// so a sign/zero extension of the dividend is required.
|
|
|
|
|
(Mod32 x y) -> (MODW (MOVWreg x) y)
|
|
|
|
|
(Mod32u x y) -> (MODWU (MOVWZreg x) y)
|
|
|
|
|
(Mod16 x y) -> (MODW (MOVHreg x) (MOVHreg y))
|
|
|
|
|
(Mod16u x y) -> (MODWU (MOVHZreg x) (MOVHZreg y))
|
|
|
|
|
(Mod8 x y) -> (MODW (MOVBreg x) (MOVBreg y))
|
|
|
|
|
(Mod8u x y) -> (MODWU (MOVBZreg x) (MOVBZreg y))
|
|
|
|
|
|
2017-02-13 16:00:09 -08:00
|
|
|
// (x + y) / 2 with x>=y -> (x - y) / 2 + y
|
|
|
|
|
(Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(And64 x y) -> (AND x y)
|
2018-02-21 19:00:21 +01:00
|
|
|
(And(32|16|8) x y) -> (ANDW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(Or64 x y) -> (OR x y)
|
2018-02-21 19:00:21 +01:00
|
|
|
(Or(32|16|8) x y) -> (ORW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(Xor64 x y) -> (XOR x y)
|
2018-02-21 19:00:21 +01:00
|
|
|
(Xor(32|16|8) x y) -> (XORW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2018-03-27 11:52:56 +01:00
|
|
|
(Neg64 x) -> (NEG x)
|
|
|
|
|
(Neg(32|16|8) x) -> (NEGW x)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Neg32F x) -> (FNEGS x)
|
|
|
|
|
(Neg64F x) -> (FNEG x)
|
|
|
|
|
|
|
|
|
|
(Com64 x) -> (NOT x)
|
2018-02-21 19:00:21 +01:00
|
|
|
(Com(32|16|8) x) -> (NOTW x)
|
2016-10-21 08:39:39 -04:00
|
|
|
(NOT x) && true -> (XOR (MOVDconst [-1]) x)
|
2016-09-12 14:50:10 -04:00
|
|
|
(NOTW x) && true -> (XORWconst [-1] x)
|
|
|
|
|
|
|
|
|
|
// Lowering boolean ops
|
|
|
|
|
(AndB x y) -> (ANDW x y)
|
|
|
|
|
(OrB x y) -> (ORW x y)
|
|
|
|
|
(Not x) -> (XORWconst [1] x)
|
|
|
|
|
|
|
|
|
|
// Lowering pointer arithmetic
|
|
|
|
|
(OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr)
|
|
|
|
|
(OffPtr [off] ptr) && is32Bit(off) -> (ADDconst [off] ptr)
|
|
|
|
|
(OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
|
|
|
|
|
|
2018-04-25 11:52:06 -07:00
|
|
|
// TODO: optimize these cases?
|
|
|
|
|
(Ctz64NonZero x) -> (Ctz64 x)
|
|
|
|
|
(Ctz32NonZero x) -> (Ctz32 x)
|
|
|
|
|
|
2016-09-16 21:42:18 -04:00
|
|
|
// Ctz(x) = 64 - findLeftmostOne((x-1)&^x)
|
|
|
|
|
(Ctz64 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
|
|
|
|
|
(Ctz32 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
|
|
|
|
|
|
2017-03-16 14:08:31 -07:00
|
|
|
(BitLen64 x) -> (SUB (MOVDconst [64]) (FLOGR x))
|
|
|
|
|
|
cmd/compile: implement OnesCount{8,16,32,64} intrinsics on s390x
This CL implements the math/bits.OnesCount{8,16,32,64} functions
as intrinsics on s390x using the 'population count' (popcnt)
instruction. This instruction was released as the 'population-count'
facility which uses the same facility bit (45) as the
'distinct-operands' facility which is a pre-requisite for Go on
s390x. We can therefore use it without a feature check.
The s390x popcnt instruction treats a 64 bit register as a vector
of 8 bytes, summing the number of ones in each byte individually.
It then writes the results to the corresponding bytes in the
output register. Therefore to implement OnesCount{16,32,64} we
need to sum the individual byte counts using some extra
instructions. To do this efficiently I've added some additional
pseudo operations to the s390x SSA backend.
Unlike other architectures the new instruction sequence is faster
for OnesCount8, so that is implemented using the intrinsic.
name old time/op new time/op delta
OnesCount 3.21ns ± 1% 1.35ns ± 0% -58.00% (p=0.000 n=20+20)
OnesCount8 0.91ns ± 1% 0.81ns ± 0% -11.43% (p=0.000 n=20+20)
OnesCount16 1.51ns ± 3% 1.21ns ± 0% -19.71% (p=0.000 n=20+17)
OnesCount32 1.91ns ± 0% 1.12ns ± 1% -41.60% (p=0.000 n=19+20)
OnesCount64 3.18ns ± 4% 1.35ns ± 0% -57.52% (p=0.000 n=20+20)
Change-Id: Id54f0bd28b6db9a887ad12c0d72fcc168ef9c4e0
Reviewed-on: https://go-review.googlesource.com/114675
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-05-25 17:54:58 +01:00
|
|
|
// POPCNT treats the input register as a vector of 8 bytes, producing
|
|
|
|
|
// a population count for each individual byte. For inputs larger than
|
|
|
|
|
// a single byte we therefore need to sum the individual bytes produced
|
|
|
|
|
// by the POPCNT instruction. For example, the following instruction
|
|
|
|
|
// sequence could be used to calculate the population count of a 4-byte
|
|
|
|
|
// value:
|
|
|
|
|
//
|
|
|
|
|
// MOVD $0x12345678, R1 // R1=0x12345678 <-- input
|
|
|
|
|
// POPCNT R1, R2 // R2=0x02030404
|
|
|
|
|
// SRW $16, R2, R3 // R3=0x00000203
|
|
|
|
|
// ADDW R2, R3, R4 // R4=0x02030607
|
|
|
|
|
// SRW $8, R4, R5 // R5=0x00020306
|
|
|
|
|
// ADDW R4, R5, R6 // R6=0x0205090d
|
|
|
|
|
// MOVBZ R6, R7 // R7=0x0000000d <-- result is 13
|
|
|
|
|
//
|
|
|
|
|
(PopCount8 x) -> (POPCNT (MOVBZreg x))
|
|
|
|
|
(PopCount16 x) -> (MOVBZreg (SumBytes2 (POPCNT <typ.UInt16> x)))
|
|
|
|
|
(PopCount32 x) -> (MOVBZreg (SumBytes4 (POPCNT <typ.UInt32> x)))
|
|
|
|
|
(PopCount64 x) -> (MOVBZreg (SumBytes8 (POPCNT <typ.UInt64> x)))
|
|
|
|
|
|
|
|
|
|
// SumBytes{2,4,8} pseudo operations sum the values of the rightmost
|
|
|
|
|
// 2, 4 or 8 bytes respectively. The result is a single byte however
|
|
|
|
|
// other bytes might contain junk so a zero extension is required if
|
|
|
|
|
// the desired output type is larger than 1 byte.
|
|
|
|
|
(SumBytes2 x) -> (ADDW (SRWconst <typ.UInt8> x [8]) x)
|
|
|
|
|
(SumBytes4 x) -> (SumBytes2 (ADDW <typ.UInt16> (SRWconst <typ.UInt16> x [16]) x))
|
|
|
|
|
(SumBytes8 x) -> (SumBytes4 (ADDW <typ.UInt32> (SRDconst <typ.UInt32> x [32]) x))
|
|
|
|
|
|
2016-09-16 21:42:18 -04:00
|
|
|
(Bswap64 x) -> (MOVDBR x)
|
|
|
|
|
(Bswap32 x) -> (MOVWBR x)
|
|
|
|
|
|
2019-04-30 17:46:23 +01:00
|
|
|
// add with carry
|
|
|
|
|
(Select0 (Add64carry x y c))
|
|
|
|
|
-> (Select0 <typ.UInt64> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))
|
|
|
|
|
(Select1 (Add64carry x y c))
|
|
|
|
|
-> (Select0 <typ.UInt64> (ADDE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (ADDE x y (Select1 <types.TypeFlags> (ADDCconst c [-1]))))))
|
|
|
|
|
|
|
|
|
|
// subtract with borrow
|
|
|
|
|
(Select0 (Sub64borrow x y c))
|
|
|
|
|
-> (Select0 <typ.UInt64> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c))))
|
|
|
|
|
(Select1 (Sub64borrow x y c))
|
|
|
|
|
-> (NEG (Select0 <typ.UInt64> (SUBE (MOVDconst [0]) (MOVDconst [0]) (Select1 <types.TypeFlags> (SUBE x y (Select1 <types.TypeFlags> (SUBC (MOVDconst [0]) c)))))))
|
|
|
|
|
|
2017-09-14 20:00:02 +01:00
|
|
|
// math package intrinsics
|
2017-10-30 09:02:44 -04:00
|
|
|
(Sqrt x) -> (FSQRT x)
|
|
|
|
|
(Floor x) -> (FIDBR [7] x)
|
|
|
|
|
(Ceil x) -> (FIDBR [6] x)
|
|
|
|
|
(Trunc x) -> (FIDBR [5] x)
|
|
|
|
|
(RoundToEven x) -> (FIDBR [4] x)
|
|
|
|
|
(Round x) -> (FIDBR [1] x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2019-06-04 19:17:41 +01:00
|
|
|
// Atomic loads and stores.
|
|
|
|
|
// The SYNC instruction (fast-BCR-serialization) prevents store-load
|
|
|
|
|
// reordering. Other sequences of memory operations (load-load,
|
|
|
|
|
// store-store and load-store) are already guaranteed not to be reordered.
|
|
|
|
|
(AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
|
|
|
|
|
(AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))
|
|
|
|
|
|
|
|
|
|
// Store-release doesn't require store-load ordering.
|
|
|
|
|
(AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)
|
2016-10-19 16:41:01 -04:00
|
|
|
|
|
|
|
|
// Atomic adds.
|
2017-05-15 09:00:55 -07:00
|
|
|
(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))
|
|
|
|
|
(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (LAAG ptr val mem))
|
|
|
|
|
(Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDW val (Select0 <t> tuple))
|
|
|
|
|
(Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple)
|
|
|
|
|
(Select0 <t> (AddTupleFirst64 val tuple)) -> (ADD val (Select0 <t> tuple))
|
|
|
|
|
(Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple)
|
2016-10-19 16:41:01 -04:00
|
|
|
|
|
|
|
|
// Atomic exchanges.
|
|
|
|
|
(AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
|
|
|
|
|
(AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
|
|
|
|
|
|
|
|
|
|
// Atomic compare and swap.
|
|
|
|
|
(AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
|
|
|
|
|
(AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Lowering extension
|
|
|
|
|
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
|
2018-02-21 19:00:21 +01:00
|
|
|
(SignExt8to(16|32|64) x) -> (MOVBreg x)
|
|
|
|
|
(SignExt16to(32|64) x) -> (MOVHreg x)
|
2016-09-12 14:50:10 -04:00
|
|
|
(SignExt32to64 x) -> (MOVWreg x)
|
|
|
|
|
|
2018-02-21 19:00:21 +01:00
|
|
|
(ZeroExt8to(16|32|64) x) -> (MOVBZreg x)
|
|
|
|
|
(ZeroExt16to(32|64) x) -> (MOVHZreg x)
|
2016-09-12 14:50:10 -04:00
|
|
|
(ZeroExt32to64 x) -> (MOVWZreg x)
|
|
|
|
|
|
2017-01-24 12:48:10 -08:00
|
|
|
(Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])
|
2016-10-25 15:49:52 -07:00
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Lowering truncation
|
|
|
|
|
// Because we ignore high parts of registers, truncates are just copies.
|
2018-02-21 19:00:21 +01:00
|
|
|
(Trunc(16|32|64)to8 x) -> x
|
|
|
|
|
(Trunc(32|64)to16 x) -> x
|
2016-09-12 14:50:10 -04:00
|
|
|
(Trunc64to32 x) -> x
|
|
|
|
|
|
|
|
|
|
// Lowering float <-> int
|
|
|
|
|
(Cvt32to32F x) -> (CEFBRA x)
|
|
|
|
|
(Cvt32to64F x) -> (CDFBRA x)
|
|
|
|
|
(Cvt64to32F x) -> (CEGBRA x)
|
|
|
|
|
(Cvt64to64F x) -> (CDGBRA x)
|
|
|
|
|
|
|
|
|
|
(Cvt32Fto32 x) -> (CFEBRA x)
|
|
|
|
|
(Cvt32Fto64 x) -> (CGEBRA x)
|
|
|
|
|
(Cvt64Fto32 x) -> (CFDBRA x)
|
|
|
|
|
(Cvt64Fto64 x) -> (CGDBRA x)
|
|
|
|
|
|
|
|
|
|
(Cvt32Fto64F x) -> (LDEBR x)
|
|
|
|
|
(Cvt64Fto32F x) -> (LEDBR x)
|
|
|
|
|
|
2018-02-21 19:00:21 +01:00
|
|
|
(Round(32|64)F x) -> (LoweredRound(32|64)F x)
|
2017-02-12 22:12:12 -05:00
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Lowering shifts
|
2018-04-30 13:27:50 +01:00
|
|
|
|
|
|
|
|
// Lower bounded shifts first. No need to check shift value.
|
|
|
|
|
(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLD x y)
|
|
|
|
|
(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLW x y)
|
|
|
|
|
(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLW x y)
|
|
|
|
|
(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SLW x y)
|
|
|
|
|
(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRD x y)
|
|
|
|
|
(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW x y)
|
|
|
|
|
(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW (MOVHZreg x) y)
|
|
|
|
|
(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SRW (MOVBZreg x) y)
|
|
|
|
|
(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAD x y)
|
|
|
|
|
(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAW x y)
|
|
|
|
|
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAW (MOVHreg x) y)
|
|
|
|
|
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SRAW (MOVBreg x) y)
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
|
2018-04-30 13:27:50 +01:00
|
|
|
// result = shift >= 64 ? 0 : arg << shift
|
2019-09-13 13:28:49 +01:00
|
|
|
(Lsh(64|32|16|8)x64 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
|
|
|
|
|
(Lsh(64|32|16|8)x32 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
|
|
|
|
|
(Lsh(64|32|16|8)x16 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
|
|
|
|
|
(Lsh(64|32|16|8)x8 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SL(D|W|W|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
|
2018-04-30 13:27:50 +01:00
|
|
|
|
2019-09-13 13:28:49 +01:00
|
|
|
(Rsh(64|32)Ux64 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
|
|
|
|
|
(Rsh(64|32)Ux32 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
|
|
|
|
|
(Rsh(64|32)Ux16 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
|
|
|
|
|
(Rsh(64|32)Ux8 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SR(D|W) <t> x y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
|
2018-04-30 13:27:50 +01:00
|
|
|
|
2019-09-13 13:28:49 +01:00
|
|
|
(Rsh(16|8)Ux64 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPUconst y [64]))
|
|
|
|
|
(Rsh(16|8)Ux32 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst y [64]))
|
|
|
|
|
(Rsh(16|8)Ux16 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVHZreg y) [64]))
|
|
|
|
|
(Rsh(16|8)Ux8 <t> x y) -> (LOCGR {s390x.GreaterOrEqual} <t> (SRW <t> (MOV(H|B)Zreg x) y) (MOVDconst [0]) (CMPWUconst (MOVBZreg y) [64]))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
|
2018-04-30 13:27:50 +01:00
|
|
|
// We implement this by setting the shift value to 63 (all ones) if the shift value is more than 63.
|
|
|
|
|
// result = arg >> (shift >= 64 ? 63 : shift)
|
2019-09-13 13:28:49 +01:00
|
|
|
(Rsh(64|32)x64 x y) -> (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst y [64])))
|
|
|
|
|
(Rsh(64|32)x32 x y) -> (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64])))
|
|
|
|
|
(Rsh(64|32)x16 x y) -> (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64])))
|
|
|
|
|
(Rsh(64|32)x8 x y) -> (SRA(D|W) x (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64])))
|
2018-04-30 13:27:50 +01:00
|
|
|
|
2019-09-13 13:28:49 +01:00
|
|
|
(Rsh(16|8)x64 x y) -> (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPUconst y [64])))
|
|
|
|
|
(Rsh(16|8)x32 x y) -> (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst y [64])))
|
|
|
|
|
(Rsh(16|8)x16 x y) -> (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVHZreg y) [64])))
|
|
|
|
|
(Rsh(16|8)x8 x y) -> (SRAW (MOV(H|B)reg x) (LOCGR {s390x.GreaterOrEqual} <y.Type> y (MOVDconst <y.Type> [63]) (CMPWUconst (MOVBZreg y) [64])))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2018-09-03 10:47:58 -04:00
|
|
|
// Lowering rotates
|
2019-03-09 21:58:16 -07:00
|
|
|
(RotateLeft8 <t> x (MOVDconst [c])) -> (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
|
|
|
|
|
(RotateLeft16 <t> x (MOVDconst [c])) -> (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
|
2018-09-03 10:47:58 -04:00
|
|
|
(RotateLeft32 x y) -> (RLL x y)
|
|
|
|
|
(RotateLeft64 x y) -> (RLLG x y)
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Lowering comparisons
|
2019-09-13 13:28:49 +01:00
|
|
|
(Less64 x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
|
|
|
|
|
(Less32 x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
|
|
|
|
|
(Less(16|8) x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
|
|
|
|
|
(Less64U x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
|
|
|
|
|
(Less32U x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
|
|
|
|
|
(Less(16|8)U x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
|
|
|
|
|
(Less64F x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
|
|
|
|
|
(Less32F x y) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
|
|
|
|
|
|
|
|
|
|
(Leq64 x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
|
|
|
|
|
(Leq32 x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
|
|
|
|
|
(Leq(16|8) x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
|
|
|
|
|
(Leq64U x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
|
|
|
|
|
(Leq32U x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
|
|
|
|
|
(Leq(16|8)U x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
|
|
|
|
|
(Leq64F x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
|
|
|
|
|
(Leq32F x y) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
|
|
|
|
|
|
|
|
|
|
(Greater64 x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
|
|
|
|
|
(Greater32 x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
|
|
|
|
|
(Greater(16|8) x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
|
|
|
|
|
(Greater64U x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
|
|
|
|
|
(Greater32U x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
|
|
|
|
|
(Greater(16|8)U x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
|
|
|
|
|
(Greater64F x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
|
|
|
|
|
(Greater32F x y) -> (LOCGR {s390x.Greater} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
|
|
|
|
|
|
|
|
|
|
(Geq64 x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
|
|
|
|
|
(Geq32 x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
|
|
|
|
|
(Geq(16|8) x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
|
|
|
|
|
(Geq64U x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
|
|
|
|
|
(Geq32U x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
|
|
|
|
|
(Geq(16|8)U x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
|
|
|
|
|
(Geq64F x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
|
|
|
|
|
(Geq32F x y) -> (LOCGR {s390x.GreaterOrEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
|
|
|
|
|
|
|
|
|
|
(Eq(64|Ptr) x y) -> (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
|
|
|
|
|
(Eq32 x y) -> (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
|
|
|
|
|
(Eq(16|8|B) x y) -> (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
|
|
|
|
|
(Eq64F x y) -> (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
|
|
|
|
|
(Eq32F x y) -> (LOCGR {s390x.Equal} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
|
|
|
|
|
|
|
|
|
|
(Neq(64|Ptr) x y) -> (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
|
|
|
|
|
(Neq32 x y) -> (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
|
|
|
|
|
(Neq(16|8|B) x y) -> (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
|
|
|
|
|
(Neq64F x y) -> (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
|
|
|
|
|
(Neq32F x y) -> (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Lowering loads
|
|
|
|
|
(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
|
2017-02-03 04:55:34 -05:00
|
|
|
(Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
|
|
|
|
|
(Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
|
|
|
|
|
(Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
|
|
|
|
|
(Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
|
|
|
|
|
(Load <t> ptr mem) && is8BitInt(t) && isSigned(t) -> (MOVBload ptr mem)
|
|
|
|
|
(Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && !isSigned(t))) -> (MOVBZload ptr mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
|
|
|
|
|
(Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
|
|
|
|
|
|
|
|
|
|
// Lowering stores
|
|
|
|
|
// These more-specific FP versions of Store pattern should come first.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
|
|
|
|
|
(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVDstore ptr val mem)
|
|
|
|
|
(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVWstore ptr val mem)
|
|
|
|
|
(Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
|
|
|
|
|
(Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Lowering moves
|
|
|
|
|
|
|
|
|
|
// Load and store for small copies.
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [0] _ _ mem) -> mem
|
|
|
|
|
(Move [1] dst src mem) -> (MOVBstore dst (MOVBZload src mem) mem)
|
|
|
|
|
(Move [2] dst src mem) -> (MOVHstore dst (MOVHZload src mem) mem)
|
|
|
|
|
(Move [4] dst src mem) -> (MOVWstore dst (MOVWZload src mem) mem)
|
|
|
|
|
(Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
|
|
|
|
|
(Move [16] dst src mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstore [8] dst (MOVDload [8] src mem)
|
|
|
|
|
(MOVDstore dst (MOVDload src mem) mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [24] dst src mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstore [16] dst (MOVDload [16] src mem)
|
|
|
|
|
(MOVDstore [8] dst (MOVDload [8] src mem)
|
|
|
|
|
(MOVDstore dst (MOVDload src mem) mem)))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [3] dst src mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstore [2] dst (MOVBZload [2] src mem)
|
|
|
|
|
(MOVHstore dst (MOVHZload src mem) mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [5] dst src mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstore [4] dst (MOVBZload [4] src mem)
|
|
|
|
|
(MOVWstore dst (MOVWZload src mem) mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [6] dst src mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstore [4] dst (MOVHZload [4] src mem)
|
|
|
|
|
(MOVWstore dst (MOVWZload src mem) mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [7] dst src mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstore [6] dst (MOVBZload [6] src mem)
|
|
|
|
|
(MOVHstore [4] dst (MOVHZload [4] src mem)
|
|
|
|
|
(MOVWstore dst (MOVWZload src mem) mem)))
|
|
|
|
|
|
|
|
|
|
// MVC for other moves. Use up to 4 instructions (sizes up to 1024 bytes).
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [s] dst src mem) && s > 0 && s <= 256 ->
|
|
|
|
|
(MVC [makeValAndOff(s, 0)] dst src mem)
|
|
|
|
|
(Move [s] dst src mem) && s > 256 && s <= 512 ->
|
|
|
|
|
(MVC [makeValAndOff(s-256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))
|
|
|
|
|
(Move [s] dst src mem) && s > 512 && s <= 768 ->
|
|
|
|
|
(MVC [makeValAndOff(s-512, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem)))
|
|
|
|
|
(Move [s] dst src mem) && s > 768 && s <= 1024 ->
|
|
|
|
|
(MVC [makeValAndOff(s-768, 768)] dst src (MVC [makeValAndOff(256, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Move more than 1024 bytes using a loop.
|
2017-03-13 21:51:08 -04:00
|
|
|
(Move [s] dst src mem) && s > 1024 ->
|
2018-11-28 12:41:23 -08:00
|
|
|
(LoweredMove [s%256] dst src (ADD <src.Type> src (MOVDconst [(s/256)*256])) mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Lowering Zero instructions
|
2017-03-13 21:51:08 -04:00
|
|
|
(Zero [0] _ mem) -> mem
|
|
|
|
|
(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
|
|
|
|
|
(Zero [2] destptr mem) -> (MOVHstoreconst [0] destptr mem)
|
|
|
|
|
(Zero [4] destptr mem) -> (MOVWstoreconst [0] destptr mem)
|
|
|
|
|
(Zero [8] destptr mem) -> (MOVDstoreconst [0] destptr mem)
|
|
|
|
|
(Zero [3] destptr mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstoreconst [makeValAndOff(0,2)] destptr
|
|
|
|
|
(MOVHstoreconst [0] destptr mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Zero [5] destptr mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstoreconst [makeValAndOff(0,4)] destptr
|
|
|
|
|
(MOVWstoreconst [0] destptr mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Zero [6] destptr mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstoreconst [makeValAndOff(0,4)] destptr
|
|
|
|
|
(MOVWstoreconst [0] destptr mem))
|
2017-03-13 21:51:08 -04:00
|
|
|
(Zero [7] destptr mem) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstoreconst [makeValAndOff(0,3)] destptr
|
|
|
|
|
(MOVWstoreconst [0] destptr mem))
|
|
|
|
|
|
2017-03-13 21:51:08 -04:00
|
|
|
(Zero [s] destptr mem) && s > 0 && s <= 1024 ->
|
|
|
|
|
(CLEAR [makeValAndOff(s, 0)] destptr mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Move more than 1024 bytes using a loop.
|
2017-03-13 21:51:08 -04:00
|
|
|
(Zero [s] destptr mem) && s > 1024 ->
|
|
|
|
|
(LoweredZero [s%256] destptr (ADDconst <destptr.Type> destptr [(s/256)*256]) mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Lowering constants
|
2018-02-21 19:00:21 +01:00
|
|
|
(Const(64|32|16|8) [val]) -> (MOVDconst [val])
|
|
|
|
|
(Const(32|64)F [val]) -> (FMOV(S|D)const [val])
|
2016-09-12 14:50:10 -04:00
|
|
|
(ConstNil) -> (MOVDconst [0])
|
|
|
|
|
(ConstBool [b]) -> (MOVDconst [b])
|
|
|
|
|
|
|
|
|
|
// Lowering calls
|
|
|
|
|
(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
|
|
|
|
|
(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
|
|
|
|
|
(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
|
|
|
|
|
|
|
|
|
|
// Miscellaneous
|
2019-09-13 13:28:49 +01:00
|
|
|
(IsNonNil p) -> (LOCGR {s390x.NotEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPconst p [0]))
|
|
|
|
|
(IsInBounds idx len) -> (LOCGR {s390x.Less} (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
|
|
|
|
|
(IsSliceInBounds idx len) -> (LOCGR {s390x.LessOrEqual} (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
|
2016-09-12 14:50:10 -04:00
|
|
|
(NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
|
|
|
|
|
(GetG mem) -> (LoweredGetG mem)
|
|
|
|
|
(GetClosurePtr) -> (LoweredGetClosurePtr)
|
2017-10-09 15:33:29 -04:00
|
|
|
(GetCallerSP) -> (LoweredGetCallerSP)
|
2018-05-02 14:25:00 +08:00
|
|
|
(GetCallerPC) -> (LoweredGetCallerPC)
|
2016-09-12 14:50:10 -04:00
|
|
|
(Addr {sym} base) -> (MOVDaddr {sym} base)
|
2018-07-03 11:34:38 -04:00
|
|
|
(LocalAddr {sym} base _) -> (MOVDaddr {sym} base)
|
2016-09-12 14:50:10 -04:00
|
|
|
(ITab (Load ptr mem)) -> (MOVDload ptr mem)
|
|
|
|
|
|
|
|
|
|
// block rewrites
|
2019-09-13 13:28:49 +01:00
|
|
|
(If cond yes no) -> (BRC {s390x.NotEqual} (CMPWconst [0] (MOVBZreg <typ.Bool> cond)) yes no)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2017-11-15 14:54:24 -08:00
|
|
|
// Write barrier.
|
|
|
|
|
(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
|
|
|
|
|
|
2019-02-06 14:12:36 -08:00
|
|
|
(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
|
|
|
|
|
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
|
|
|
|
|
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// ***************************
|
|
|
|
|
// Above: lowering rules
|
|
|
|
|
// Below: optimizations
|
|
|
|
|
// ***************************
|
|
|
|
|
// TODO: Should the optimizations be a separate pass?
|
|
|
|
|
|
2019-04-25 09:41:46 -04:00
|
|
|
// Note: when removing unnecessary sign/zero extensions.
|
|
|
|
|
//
|
|
|
|
|
// After a value is spilled it is restored using a sign- or zero-extension
|
|
|
|
|
// to register-width as appropriate for its type. For example, a uint8 will
|
|
|
|
|
// be restored using a MOVBZ (llgc) instruction which will zero extend the
|
|
|
|
|
// 8-bit value to 64-bits.
|
|
|
|
|
//
|
|
|
|
|
// This is a hazard when folding sign- and zero-extensions since we need to
|
|
|
|
|
// ensure not only that the value in the argument register is correctly
|
|
|
|
|
// extended but also that it will still be correctly extended if it is
|
|
|
|
|
// spilled and restored.
|
|
|
|
|
//
|
|
|
|
|
// In general this means we need type checks when the RHS of a rule is an
|
|
|
|
|
// OpCopy (i.e. "(... x:(...) ...) -> x").
|
|
|
|
|
|
|
|
|
|
// Merge double extensions.
|
|
|
|
|
(MOV(H|HZ)reg e:(MOV(B|BZ)reg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(W|WZ)reg e:(MOV(B|BZ)reg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(W|WZ)reg e:(MOV(H|HZ)reg x)) && clobberIfDead(e) -> (MOV(H|HZ)reg x)
|
|
|
|
|
|
|
|
|
|
// Bypass redundant sign extensions.
|
|
|
|
|
(MOV(B|BZ)reg e:(MOVBreg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(B|BZ)reg e:(MOVHreg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(B|BZ)reg e:(MOVWreg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(H|HZ)reg e:(MOVHreg x)) && clobberIfDead(e) -> (MOV(H|HZ)reg x)
|
|
|
|
|
(MOV(H|HZ)reg e:(MOVWreg x)) && clobberIfDead(e) -> (MOV(H|HZ)reg x)
|
|
|
|
|
(MOV(W|WZ)reg e:(MOVWreg x)) && clobberIfDead(e) -> (MOV(W|WZ)reg x)
|
|
|
|
|
|
|
|
|
|
// Bypass redundant zero extensions.
|
|
|
|
|
(MOV(B|BZ)reg e:(MOVBZreg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(B|BZ)reg e:(MOVHZreg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(B|BZ)reg e:(MOVWZreg x)) && clobberIfDead(e) -> (MOV(B|BZ)reg x)
|
|
|
|
|
(MOV(H|HZ)reg e:(MOVHZreg x)) && clobberIfDead(e) -> (MOV(H|HZ)reg x)
|
|
|
|
|
(MOV(H|HZ)reg e:(MOVWZreg x)) && clobberIfDead(e) -> (MOV(H|HZ)reg x)
|
|
|
|
|
(MOV(W|WZ)reg e:(MOVWZreg x)) && clobberIfDead(e) -> (MOV(W|WZ)reg x)
|
|
|
|
|
|
|
|
|
|
// Remove zero extensions after zero extending load.
|
|
|
|
|
// Note: take care that if x is spilled it is restored correctly.
|
|
|
|
|
(MOV(B|H|W)Zreg x:(MOVBZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) -> x
|
|
|
|
|
(MOV(B|H|W)Zreg x:(MOVBZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) -> x
|
|
|
|
|
(MOV(H|W)Zreg x:(MOVHZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) -> x
|
|
|
|
|
(MOV(H|W)Zreg x:(MOVHZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) -> x
|
|
|
|
|
(MOVWZreg x:(MOVWZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) -> x
|
|
|
|
|
(MOVWZreg x:(MOVWZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) -> x
|
|
|
|
|
|
|
|
|
|
// Remove sign extensions after sign extending load.
|
|
|
|
|
// Note: take care that if x is spilled it is restored correctly.
|
|
|
|
|
(MOV(B|H|W)reg x:(MOVBload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) -> x
|
|
|
|
|
(MOV(B|H|W)reg x:(MOVBloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) -> x
|
|
|
|
|
(MOV(H|W)reg x:(MOVHload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) -> x
|
|
|
|
|
(MOV(H|W)reg x:(MOVHloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) -> x
|
|
|
|
|
(MOVWreg x:(MOVWload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) -> x
|
|
|
|
|
(MOVWreg x:(MOVWloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) -> x
|
|
|
|
|
|
|
|
|
|
// Remove sign extensions after zero extending load.
|
|
|
|
|
// These type checks are probably unnecessary but do them anyway just in case.
|
|
|
|
|
(MOV(H|W)reg x:(MOVBZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) -> x
|
|
|
|
|
(MOV(H|W)reg x:(MOVBZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) -> x
|
|
|
|
|
(MOVWreg x:(MOVHZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) -> x
|
|
|
|
|
(MOVWreg x:(MOVHZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) -> x
|
|
|
|
|
|
|
|
|
|
// Fold sign and zero extensions into loads.
|
|
|
|
|
//
|
|
|
|
|
// Note: The combined instruction must end up in the same block
|
|
|
|
|
// as the original load. If not, we end up making a value with
|
|
|
|
|
// memory type live in two different blocks, which can lead to
|
|
|
|
|
// multiple memory values alive simultaneously.
|
|
|
|
|
//
|
|
|
|
|
// Make sure we don't combine these ops if the load has another use.
|
|
|
|
|
// This prevents a single load from being split into multiple loads
|
|
|
|
|
// which then might return different values. See test/atomicload.go.
|
|
|
|
|
(MOV(B|H|W)Zreg <t> x:(MOV(B|H|W)load [o] {s} p mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> @x.Block (MOV(B|H|W)Zload <t> [o] {s} p mem)
|
|
|
|
|
(MOV(B|H|W)reg <t> x:(MOV(B|H|W)Zload [o] {s} p mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> @x.Block (MOV(B|H|W)load <t> [o] {s} p mem)
|
|
|
|
|
(MOV(B|H|W)Zreg <t> x:(MOV(B|H|W)loadidx [o] {s} p i mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> @x.Block (MOV(B|H|W)Zloadidx <t> [o] {s} p i mem)
|
|
|
|
|
(MOV(B|H|W)reg <t> x:(MOV(B|H|W)Zloadidx [o] {s} p i mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> @x.Block (MOV(B|H|W)loadidx <t> [o] {s} p i mem)
|
|
|
|
|
|
|
|
|
|
// Remove zero extensions after argument load.
|
|
|
|
|
(MOVBZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() == 1 -> x
|
|
|
|
|
(MOVHZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() <= 2 -> x
|
|
|
|
|
(MOVWZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() <= 4 -> x
|
|
|
|
|
|
|
|
|
|
// Remove sign extensions after argument load.
|
|
|
|
|
(MOVBreg x:(Arg <t>)) && t.IsSigned() && t.Size() == 1 -> x
|
|
|
|
|
(MOVHreg x:(Arg <t>)) && t.IsSigned() && t.Size() <= 2 -> x
|
|
|
|
|
(MOVWreg x:(Arg <t>)) && t.IsSigned() && t.Size() <= 4 -> x
|
|
|
|
|
|
|
|
|
|
// Fold zero extensions into constants.
|
|
|
|
|
(MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64( uint8(c))])
|
|
|
|
|
(MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
|
|
|
|
|
(MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
|
|
|
|
|
|
|
|
|
|
// Fold sign extensions into constants.
|
|
|
|
|
(MOVBreg (MOVDconst [c])) -> (MOVDconst [int64( int8(c))])
|
|
|
|
|
(MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
|
|
|
|
|
(MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
|
|
|
|
|
|
|
|
|
|
// Remove zero extension of conditional move.
|
|
|
|
|
// Note: only for MOVBZreg for now since it is added as part of 'if' statement lowering.
|
2019-09-13 13:28:49 +01:00
|
|
|
(MOVBZreg x:(LOCGR (MOVDconst [c]) (MOVDconst [d]) _))
|
2019-04-25 09:41:46 -04:00
|
|
|
&& int64(uint8(c)) == c
|
|
|
|
|
&& int64(uint8(d)) == d
|
|
|
|
|
&& (!x.Type.IsSigned() || x.Type.Size() > 1)
|
|
|
|
|
-> x
|
2016-10-07 14:29:55 -04:00
|
|
|
|
|
|
|
|
// Fold boolean tests into blocks.
|
2019-09-13 13:28:49 +01:00
|
|
|
(BRC {c} (CMPWconst [0] (LOCGR {d} (MOVDconst [0]) (MOVDconst [x]) cmp)) yes no)
|
|
|
|
|
&& x != 0
|
|
|
|
|
&& c.(s390x.CCMask) == s390x.Equal
|
|
|
|
|
-> (BRC {d} cmp no yes)
|
|
|
|
|
(BRC {c} (CMPWconst [0] (LOCGR {d} (MOVDconst [0]) (MOVDconst [x]) cmp)) yes no)
|
|
|
|
|
&& x != 0
|
|
|
|
|
&& c.(s390x.CCMask) == s390x.NotEqual
|
|
|
|
|
-> (BRC {d} cmp yes no)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Fold constants into instructions.
|
|
|
|
|
(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
|
2017-11-07 04:40:56 -05:00
|
|
|
(ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
|
|
|
|
|
(SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
|
2017-11-07 04:40:56 -05:00
|
|
|
(SUBW x (MOVDconst [c])) -> (SUBWconst x [int64(int32(c))])
|
|
|
|
|
(SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [int64(int32(c))]))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
|
2017-11-07 04:40:56 -05:00
|
|
|
(MULLW x (MOVDconst [c])) -> (MULLWconst [int64(int32(c))] x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2016-10-21 08:39:39 -04:00
|
|
|
// NILF instructions leave the high 32 bits unchanged which is
|
|
|
|
|
// equivalent to the leftmost 32 bits being set.
|
|
|
|
|
// TODO(mundaym): modify the assembler to accept 64-bit values
|
|
|
|
|
// and use isU32Bit(^c).
|
|
|
|
|
(AND x (MOVDconst [c])) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
|
2018-04-30 13:27:50 +01:00
|
|
|
(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(int32(c))] x))
|
2017-11-07 04:40:56 -05:00
|
|
|
(ANDW x (MOVDconst [c])) -> (ANDWconst [int64(int32(c))] x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
(ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x)
|
|
|
|
|
(ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c & d] x)
|
|
|
|
|
|
2016-10-21 08:39:39 -04:00
|
|
|
(OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
|
2017-11-07 04:40:56 -05:00
|
|
|
(ORW x (MOVDconst [c])) -> (ORWconst [int64(int32(c))] x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2016-10-21 08:39:39 -04:00
|
|
|
(XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
|
2017-11-07 04:40:56 -05:00
|
|
|
(XORW x (MOVDconst [c])) -> (XORWconst [int64(int32(c))] x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2018-04-30 13:27:50 +01:00
|
|
|
// Constant shifts.
|
|
|
|
|
(S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c]))
|
|
|
|
|
-> (S(LD|RD|RAD|LW|RW|RAW)const x [c&63])
|
|
|
|
|
|
|
|
|
|
// Shifts only use the rightmost 6 bits of the shift value.
|
|
|
|
|
(S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y))
|
|
|
|
|
-> (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [c&63] y))
|
|
|
|
|
(S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63
|
|
|
|
|
-> (S(LD|RD|RAD|LW|RW|RAW) x y)
|
2019-04-25 09:41:46 -04:00
|
|
|
(SLD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) -> (SLD x y)
|
|
|
|
|
(SRD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) -> (SRD x y)
|
|
|
|
|
(SRAD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) -> (SRAD x y)
|
|
|
|
|
(SLW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) -> (SLW x y)
|
|
|
|
|
(SRW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) -> (SRW x y)
|
|
|
|
|
(SRAW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) -> (SRAW x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2018-09-03 10:47:58 -04:00
|
|
|
// Constant rotate generation
|
|
|
|
|
(RLL x (MOVDconst [c])) -> (RLLconst x [c&31])
|
|
|
|
|
(RLLG x (MOVDconst [c])) -> (RLLGconst x [c&63])
|
|
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
|
|
|
|
|
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
|
|
|
|
|
(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
|
|
|
|
|
|
|
|
|
|
(ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
|
|
|
|
|
( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
|
|
|
|
|
(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
|
2016-12-08 16:17:20 -08:00
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
(CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
|
|
|
|
|
(CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
|
2017-11-07 04:40:56 -05:00
|
|
|
(CMPW x (MOVDconst [c])) -> (CMPWconst x [int64(int32(c))])
|
|
|
|
|
(CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int32(c))]))
|
|
|
|
|
(CMPU x (MOVDconst [c])) && isU32Bit(c) -> (CMPUconst x [int64(int32(c))])
|
|
|
|
|
(CMPU (MOVDconst [c]) x) && isU32Bit(c) -> (InvertFlags (CMPUconst x [int64(int32(c))]))
|
|
|
|
|
(CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
|
|
|
|
|
(CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2016-10-21 08:39:39 -04:00
|
|
|
// Using MOV{W,H,B}Zreg instead of AND is cheaper.
|
|
|
|
|
(AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
|
|
|
|
|
(AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
|
|
|
|
|
(AND x (MOVDconst [0xFFFFFFFF])) -> (MOVWZreg x)
|
|
|
|
|
(ANDWconst [0xFF] x) -> (MOVBZreg x)
|
|
|
|
|
(ANDWconst [0xFFFF] x) -> (MOVHZreg x)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// strength reduction
|
|
|
|
|
(MULLDconst [-1] x) -> (NEG x)
|
|
|
|
|
(MULLDconst [0] _) -> (MOVDconst [0])
|
|
|
|
|
(MULLDconst [1] x) -> x
|
|
|
|
|
(MULLDconst [c] x) && isPowerOfTwo(c) -> (SLDconst [log2(c)] x)
|
|
|
|
|
(MULLDconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
|
|
|
|
|
(MULLDconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
|
|
|
|
|
|
|
|
|
|
(MULLWconst [-1] x) -> (NEGW x)
|
|
|
|
|
(MULLWconst [0] _) -> (MOVDconst [0])
|
|
|
|
|
(MULLWconst [1] x) -> x
|
|
|
|
|
(MULLWconst [c] x) && isPowerOfTwo(c) -> (SLWconst [log2(c)] x)
|
|
|
|
|
(MULLWconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
|
|
|
|
|
(MULLWconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
|
|
|
|
|
|
|
|
|
|
// Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
|
2016-10-07 12:16:26 -04:00
|
|
|
(ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
|
|
|
|
|
(ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
|
2017-03-30 03:30:22 +00:00
|
|
|
(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB -> (MOVDaddridx [c] {s} ptr idx)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// fold ADDconst into MOVDaddrx
|
2016-10-07 12:16:26 -04:00
|
|
|
(ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
|
|
|
|
|
(MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(c+d) && x.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
|
|
|
|
|
(MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(c+d) && y.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// reverse ordering of compare instruction
|
2019-09-13 13:28:49 +01:00
|
|
|
(LOCGR {c} x y (InvertFlags cmp)) -> (LOCGR {c.(s390x.CCMask).ReverseComparison()} x y cmp)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// replace load from same location as preceding store with copy
|
2019-04-25 09:41:46 -04:00
|
|
|
(MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
|
2017-10-27 09:45:45 -04:00
|
|
|
(MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x)
|
|
|
|
|
(MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x)
|
|
|
|
|
(MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x)
|
|
|
|
|
(MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x)
|
|
|
|
|
(MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x)
|
|
|
|
|
(MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x)
|
|
|
|
|
(MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x)
|
|
|
|
|
(FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x)
|
|
|
|
|
(FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
|
|
|
|
|
(FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
|
|
|
|
|
|
|
|
|
|
// prefer FPR <-> GPR moves over combined load ops
|
|
|
|
|
(MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR <t> y))
|
|
|
|
|
(ADDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD x (LGDR <t> y))
|
|
|
|
|
(SUBload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB x (LGDR <t> y))
|
|
|
|
|
(ORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR x (LGDR <t> y))
|
|
|
|
|
(ANDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND x (LGDR <t> y))
|
|
|
|
|
(XORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR x (LGDR <t> y))
|
|
|
|
|
|
|
|
|
|
// detect attempts to set/clear the sign bit
|
|
|
|
|
// may need to be reworked when NIHH/OIHH are added
|
|
|
|
|
(SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
|
|
|
|
|
(LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
|
2019-09-09 17:50:35 +02:00
|
|
|
(AND (MOVDconst [^(-1<<63)]) (LGDR <t> x)) -> (LGDR <t> (LPDFR <x.Type> x))
|
|
|
|
|
(LDGR <t> (AND (MOVDconst [^(-1<<63)]) x)) -> (LPDFR (LDGR <t> x))
|
2017-10-27 09:45:45 -04:00
|
|
|
(OR (MOVDconst [-1<<63]) (LGDR <t> x)) -> (LGDR <t> (LNDFR <x.Type> x))
|
|
|
|
|
(LDGR <t> (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR <t> x))
|
|
|
|
|
|
|
|
|
|
// detect attempts to set the sign bit with load
|
|
|
|
|
(LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem)))
|
|
|
|
|
|
|
|
|
|
// detect copysign
|
|
|
|
|
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
|
|
|
|
|
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
|
2019-09-09 17:50:35 +02:00
|
|
|
(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
|
|
|
|
|
(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
|
2017-10-27 09:45:45 -04:00
|
|
|
(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
|
|
|
|
|
(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
|
|
|
|
|
|
|
|
|
|
// absorb negations into set/clear sign bit
|
|
|
|
|
(FNEG (LPDFR x)) -> (LNDFR x)
|
|
|
|
|
(FNEG (LNDFR x)) -> (LPDFR x)
|
|
|
|
|
(FNEGS (LPDFR x)) -> (LNDFR x)
|
|
|
|
|
(FNEGS (LNDFR x)) -> (LPDFR x)
|
|
|
|
|
|
|
|
|
|
// no need to convert float32 to float64 to set/clear sign bit
|
|
|
|
|
(LEDBR (LPDFR (LDEBR x))) -> (LPDFR x)
|
|
|
|
|
(LEDBR (LNDFR (LDEBR x))) -> (LNDFR x)
|
|
|
|
|
|
|
|
|
|
// remove unnecessary FPR <-> GPR moves
|
|
|
|
|
(LDGR (LGDR x)) -> x
|
2019-04-25 09:41:46 -04:00
|
|
|
(LGDR (LDGR x)) -> x
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Don't extend before storing
|
|
|
|
|
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
|
|
|
|
|
(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
|
|
|
|
|
(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
(MOVWstore [off] {sym} ptr (MOVWZreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
|
|
|
|
|
(MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
|
|
|
|
|
(MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
|
|
|
|
|
|
|
|
|
|
// Fold constants into memory operations.
|
|
|
|
|
// Note that this is not always a good idea because if not all the uses of
|
|
|
|
|
// the ADDconst get eliminated, we still have to compute the ADDconst and we now
|
|
|
|
|
// have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one.
|
|
|
|
|
// Nevertheless, let's do it!
|
2016-10-07 12:16:26 -04:00
|
|
|
(MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVDload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
|
|
|
|
|
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
|
|
|
|
|
|
|
|
|
|
(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} ptr val mem)
|
|
|
|
|
(MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
|
|
|
|
|
(MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVHstore [off1+off2] {sym} ptr val mem)
|
|
|
|
|
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
|
|
|
|
|
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
|
|
|
|
|
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2017-10-27 09:45:45 -04:00
|
|
|
(ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
|
|
|
|
|
(ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
(XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload [off1+off2] {sym} x ptr mem)
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Fold constants into stores.
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(off) && ptr.Op != OpSB ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(off) && ptr.Op != OpSB ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
|
|
|
|
|
|
|
|
|
|
// Fold address offsets into constant stores.
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(ValAndOff(sc).Off()+off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
|
|
|
|
|
|
2017-07-17 07:07:28 -04:00
|
|
|
// Merge address calculations into loads and stores.
|
|
|
|
|
// Offsets from SB must not be merged into unaligned memory accesses because
|
|
|
|
|
// loads/stores using PC-relative addressing directly must be aligned to the
|
|
|
|
|
// size of the target.
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVDload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVWZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVHZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
(MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVBZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
|
2017-07-17 07:07:28 -04:00
|
|
|
(MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
|
2017-07-17 07:07:28 -04:00
|
|
|
(MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
|
|
|
|
|
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVDstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVWstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
2018-04-24 13:39:51 -07:00
|
|
|
(MOVHstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
|
|
|
|
|
2017-10-27 09:45:45 -04:00
|
|
|
(ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
|
|
|
|
|
(ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
(XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
|
|
|
|
|
2017-04-30 14:25:57 -04:00
|
|
|
// Cannot store constant to SB directly (no 'move relative long immediate' instructions).
|
|
|
|
|
(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
2017-04-30 14:25:57 -04:00
|
|
|
(MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
|
|
|
|
|
|
|
|
|
// generating indexed loads and stores
|
|
|
|
|
(MOVBZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVBZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVBload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVBloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVHload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVHloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVWload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVWloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
|
|
|
|
(FMOVSload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVSloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
|
|
|
|
(FMOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
|
|
|
|
|
|
|
|
|
|
(MOVBstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVBstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
|
|
|
|
(MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
|
|
|
|
(MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
|
|
|
|
(MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
|
|
|
|
(FMOVSstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVSstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
|
|
|
|
(FMOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
|
|
|
|
(FMOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
|
|
|
|
|
|
|
|
|
|
(MOVBZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBZloadidx [off] {sym} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVBload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx [off] {sym} ptr idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHZloadidx [off] {sym} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVHload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHloadidx [off] {sym} ptr idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWZloadidx [off] {sym} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVWload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWloadidx [off] {sym} ptr idx mem)
|
|
|
|
|
(MOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVDloadidx [off] {sym} ptr idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(FMOVSload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVSloadidx [off] {sym} ptr idx mem)
|
|
|
|
|
(FMOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVDloadidx [off] {sym} ptr idx mem)
|
2017-02-08 23:25:40 -05:00
|
|
|
|
|
|
|
|
(MOVBstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVBstoreidx [off] {sym} ptr idx val mem)
|
|
|
|
|
(MOVHstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVHstoreidx [off] {sym} ptr idx val mem)
|
|
|
|
|
(MOVWstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVWstoreidx [off] {sym} ptr idx val mem)
|
|
|
|
|
(MOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVDstoreidx [off] {sym} ptr idx val mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(FMOVSstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVSstoreidx [off] {sym} ptr idx val mem)
|
|
|
|
|
(FMOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVDstoreidx [off] {sym} ptr idx val mem)
|
|
|
|
|
|
|
|
|
|
// combine ADD into indexed loads and stores
|
2017-02-08 23:25:40 -05:00
|
|
|
(MOVBZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVBloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVHloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVWloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVDloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(FMOVSloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(FMOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
|
|
|
|
|
(MOVBstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVBstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVHstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVWstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVDstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(FMOVSstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(FMOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
|
|
|
|
|
(MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVBloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVHloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVWloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVDloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(FMOVSloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
(FMOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
|
|
|
|
|
|
|
|
|
|
(MOVBstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVBstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVHstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVWstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVDstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(FMOVSstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
|
|
|
|
|
(FMOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// MOVDaddr into MOVDaddridx
|
|
|
|
|
(MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
|
|
|
|
|
(MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
|
|
|
|
|
(MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
|
|
|
|
|
(MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
|
|
|
|
|
|
|
|
|
|
// Absorb InvertFlags into branches.
|
2019-09-13 13:28:49 +01:00
|
|
|
(BRC {c} (InvertFlags cmp) yes no) -> (BRC {c.(s390x.CCMask).ReverseComparison()} cmp yes no)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Constant comparisons.
|
|
|
|
|
(CMPconst (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
|
|
|
|
|
(CMPconst (MOVDconst [x]) [y]) && x<y -> (FlagLT)
|
|
|
|
|
(CMPconst (MOVDconst [x]) [y]) && x>y -> (FlagGT)
|
|
|
|
|
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) -> (FlagEQ)
|
|
|
|
|
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
|
|
|
|
|
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) -> (FlagGT)
|
|
|
|
|
|
|
|
|
|
(CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
|
|
|
|
|
(CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) -> (FlagLT)
|
|
|
|
|
(CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) -> (FlagGT)
|
|
|
|
|
(CMPWUconst (MOVDconst [x]) [y]) && uint32(x)==uint32(y) -> (FlagEQ)
|
|
|
|
|
(CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
|
|
|
|
|
(CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
|
|
|
|
|
|
2018-04-30 13:27:50 +01:00
|
|
|
(CMP(W|WU)const (MOVBZreg _) [c]) && 0xff < c -> (FlagLT)
|
|
|
|
|
(CMP(W|WU)const (MOVHZreg _) [c]) && 0xffff < c -> (FlagLT)
|
|
|
|
|
|
|
|
|
|
(CMPconst (SRDconst _ [c]) [n]) && c > 0 && n < 0 -> (FlagGT)
|
|
|
|
|
(CMPWconst (SRWconst _ [c]) [n]) && c > 0 && n < 0 -> (FlagGT)
|
|
|
|
|
|
|
|
|
|
(CMPUconst (SRDconst _ [c]) [n]) && c > 0 && c < 64 && (1<<uint(64-c)) <= uint64(n) -> (FlagLT)
|
|
|
|
|
(CMPWUconst (SRWconst _ [c]) [n]) && c > 0 && c < 32 && (1<<uint(32-c)) <= uint32(n) -> (FlagLT)
|
|
|
|
|
|
|
|
|
|
(CMPWconst (ANDWconst _ [m]) [n]) && int32(m) >= 0 && int32(m) < int32(n) -> (FlagLT)
|
|
|
|
|
(CMPWUconst (ANDWconst _ [m]) [n]) && uint32(m) < uint32(n) -> (FlagLT)
|
|
|
|
|
|
|
|
|
|
// Convert 64-bit comparisons to 32-bit comparisons and signed comparisons
|
|
|
|
|
// to unsigned comparisons.
|
|
|
|
|
// Helps simplify constant comparison detection.
|
|
|
|
|
(CM(P|PU)const (MOV(W|WZ)reg x) [c]) -> (CMP(W|WU)const x [c])
|
|
|
|
|
(CM(P|P|PU|PU)const x:(MOV(H|HZ|H|HZ)reg _) [c]) -> (CMP(W|W|WU|WU)const x [c])
|
|
|
|
|
(CM(P|P|PU|PU)const x:(MOV(B|BZ|B|BZ)reg _) [c]) -> (CMP(W|W|WU|WU)const x [c])
|
|
|
|
|
(CMPconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 && c >= 0 -> (CMPWUconst x [c])
|
|
|
|
|
(CMPUconst (MOV(WZ|W)reg x:(ANDWconst [m] _)) [c]) && int32(m) >= 0 -> (CMPWUconst x [c])
|
|
|
|
|
(CMPconst x:(SRDconst _ [c]) [n]) && c > 0 && n >= 0 -> (CMPUconst x [n])
|
|
|
|
|
(CMPWconst x:(SRWconst _ [c]) [n]) && c > 0 && n >= 0 -> (CMPWUconst x [n])
|
|
|
|
|
|
|
|
|
|
// Absorb sign and zero extensions into 32-bit comparisons.
|
|
|
|
|
(CMP(W|W|WU|WU) x (MOV(W|WZ|W|WZ)reg y)) -> (CMP(W|W|WU|WU) x y)
|
|
|
|
|
(CMP(W|W|WU|WU) (MOV(W|WZ|W|WZ)reg x) y) -> (CMP(W|W|WU|WU) x y)
|
|
|
|
|
(CMP(W|W|WU|WU)const (MOV(W|WZ|W|WZ)reg x) [c]) -> (CMP(W|W|WU|WU)const x [c])
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Absorb flag constants into branches.
|
2019-08-12 20:19:58 +01:00
|
|
|
(BRC {c} (FlagEQ) yes no) && c.(s390x.CCMask) & s390x.Equal != 0 -> (First yes no)
|
|
|
|
|
(BRC {c} (FlagLT) yes no) && c.(s390x.CCMask) & s390x.Less != 0 -> (First yes no)
|
|
|
|
|
(BRC {c} (FlagGT) yes no) && c.(s390x.CCMask) & s390x.Greater != 0 -> (First yes no)
|
|
|
|
|
(BRC {c} (FlagOV) yes no) && c.(s390x.CCMask) & s390x.Unordered != 0 -> (First yes no)
|
|
|
|
|
|
|
|
|
|
(BRC {c} (FlagEQ) yes no) && c.(s390x.CCMask) & s390x.Equal == 0 -> (First no yes)
|
|
|
|
|
(BRC {c} (FlagLT) yes no) && c.(s390x.CCMask) & s390x.Less == 0 -> (First no yes)
|
|
|
|
|
(BRC {c} (FlagGT) yes no) && c.(s390x.CCMask) & s390x.Greater == 0 -> (First no yes)
|
|
|
|
|
(BRC {c} (FlagOV) yes no) && c.(s390x.CCMask) & s390x.Unordered == 0 -> (First no yes)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Absorb flag constants into SETxx ops.
|
2019-09-13 13:28:49 +01:00
|
|
|
(LOCGR {c} _ x (FlagEQ)) && c.(s390x.CCMask) & s390x.Equal != 0 -> x
|
|
|
|
|
(LOCGR {c} _ x (FlagLT)) && c.(s390x.CCMask) & s390x.Less != 0 -> x
|
|
|
|
|
(LOCGR {c} _ x (FlagGT)) && c.(s390x.CCMask) & s390x.Greater != 0 -> x
|
|
|
|
|
(LOCGR {c} _ x (FlagOV)) && c.(s390x.CCMask) & s390x.Unordered != 0 -> x
|
|
|
|
|
|
|
|
|
|
(LOCGR {c} x _ (FlagEQ)) && c.(s390x.CCMask) & s390x.Equal == 0 -> x
|
|
|
|
|
(LOCGR {c} x _ (FlagLT)) && c.(s390x.CCMask) & s390x.Less == 0 -> x
|
|
|
|
|
(LOCGR {c} x _ (FlagGT)) && c.(s390x.CCMask) & s390x.Greater == 0 -> x
|
|
|
|
|
(LOCGR {c} x _ (FlagOV)) && c.(s390x.CCMask) & s390x.Unordered == 0 -> x
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Remove redundant *const ops
|
|
|
|
|
(ADDconst [0] x) -> x
|
|
|
|
|
(ADDWconst [c] x) && int32(c)==0 -> x
|
|
|
|
|
(SUBconst [0] x) -> x
|
|
|
|
|
(SUBWconst [c] x) && int32(c) == 0 -> x
|
|
|
|
|
(ANDconst [0] _) -> (MOVDconst [0])
|
|
|
|
|
(ANDWconst [c] _) && int32(c)==0 -> (MOVDconst [0])
|
|
|
|
|
(ANDconst [-1] x) -> x
|
|
|
|
|
(ANDWconst [c] x) && int32(c)==-1 -> x
|
|
|
|
|
(ORconst [0] x) -> x
|
|
|
|
|
(ORWconst [c] x) && int32(c)==0 -> x
|
|
|
|
|
(ORconst [-1] _) -> (MOVDconst [-1])
|
|
|
|
|
(ORWconst [c] _) && int32(c)==-1 -> (MOVDconst [-1])
|
|
|
|
|
(XORconst [0] x) -> x
|
|
|
|
|
(XORWconst [c] x) && int32(c)==0 -> x
|
|
|
|
|
|
|
|
|
|
// Convert constant subtracts to constant adds.
|
|
|
|
|
(SUBconst [c] x) && c != -(1<<31) -> (ADDconst [-c] x)
|
|
|
|
|
(SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
|
|
|
|
|
|
|
|
|
|
// generic constant folding
|
|
|
|
|
// TODO: more of this
|
|
|
|
|
(ADDconst [c] (MOVDconst [d])) -> (MOVDconst [c+d])
|
|
|
|
|
(ADDWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c+d))])
|
|
|
|
|
(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
|
|
|
|
|
(ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int32(c+d))] x)
|
|
|
|
|
(SUBconst (MOVDconst [d]) [c]) -> (MOVDconst [d-c])
|
|
|
|
|
(SUBconst (SUBconst x [d]) [c]) && is32Bit(-c-d) -> (ADDconst [-c-d] x)
|
|
|
|
|
(SRADconst [c] (MOVDconst [d])) -> (MOVDconst [d>>uint64(c)])
|
2018-02-14 14:21:31 -08:00
|
|
|
(SRAWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(d))>>uint64(c)])
|
2016-09-12 14:50:10 -04:00
|
|
|
(NEG (MOVDconst [c])) -> (MOVDconst [-c])
|
|
|
|
|
(NEGW (MOVDconst [c])) -> (MOVDconst [int64(int32(-c))])
|
|
|
|
|
(MULLDconst [c] (MOVDconst [d])) -> (MOVDconst [c*d])
|
|
|
|
|
(MULLWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c*d))])
|
2016-10-21 08:39:39 -04:00
|
|
|
(AND (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c&d])
|
2016-09-12 14:50:10 -04:00
|
|
|
(ANDconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
|
|
|
|
|
(ANDWconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
|
2016-10-21 08:39:39 -04:00
|
|
|
(OR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c|d])
|
2016-09-12 14:50:10 -04:00
|
|
|
(ORconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
|
|
|
|
|
(ORWconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
|
2016-10-21 08:39:39 -04:00
|
|
|
(XOR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c^d])
|
2016-09-12 14:50:10 -04:00
|
|
|
(XORconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
|
|
|
|
|
(XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
|
2017-02-12 22:12:12 -05:00
|
|
|
(LoweredRound32F x:(FMOVSconst)) -> x
|
|
|
|
|
(LoweredRound64F x:(FMOVDconst)) -> x
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// generic simplifications
|
|
|
|
|
// TODO: more of this
|
|
|
|
|
(ADD x (NEG y)) -> (SUB x y)
|
|
|
|
|
(ADDW x (NEGW y)) -> (SUBW x y)
|
|
|
|
|
(SUB x x) -> (MOVDconst [0])
|
|
|
|
|
(SUBW x x) -> (MOVDconst [0])
|
|
|
|
|
(AND x x) -> x
|
|
|
|
|
(ANDW x x) -> x
|
|
|
|
|
(OR x x) -> x
|
|
|
|
|
(ORW x x) -> x
|
|
|
|
|
(XOR x x) -> (MOVDconst [0])
|
|
|
|
|
(XORW x x) -> (MOVDconst [0])
|
2017-03-16 14:08:31 -07:00
|
|
|
(NEG (ADDconst [c] (NEG x))) && c != -(1<<31) -> (ADDconst [-c] x)
|
2018-04-30 13:27:50 +01:00
|
|
|
(MOVBZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
|
|
|
|
|
(MOVHZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
|
|
|
|
|
(MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
|
|
|
|
|
(MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2019-04-30 17:46:23 +01:00
|
|
|
// carry flag generation
|
|
|
|
|
// (only constant fold carry of zero)
|
|
|
|
|
(Select1 (ADDCconst (MOVDconst [c]) [d]))
|
|
|
|
|
&& uint64(c+d) >= uint64(c) && c+d == 0
|
|
|
|
|
-> (FlagEQ)
|
|
|
|
|
(Select1 (ADDCconst (MOVDconst [c]) [d]))
|
|
|
|
|
&& uint64(c+d) >= uint64(c) && c+d != 0
|
|
|
|
|
-> (FlagLT)
|
|
|
|
|
|
|
|
|
|
// borrow flag generation
|
|
|
|
|
// (only constant fold borrow of zero)
|
|
|
|
|
(Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
|
|
|
|
|
&& uint64(d) <= uint64(c) && c-d == 0
|
|
|
|
|
-> (FlagGT)
|
|
|
|
|
(Select1 (SUBC (MOVDconst [c]) (MOVDconst [d])))
|
|
|
|
|
&& uint64(d) <= uint64(c) && c-d != 0
|
|
|
|
|
-> (FlagOV)
|
|
|
|
|
|
|
|
|
|
// add with carry
|
|
|
|
|
(ADDE x y (FlagEQ)) -> (ADDC x y)
|
|
|
|
|
(ADDE x y (FlagLT)) -> (ADDC x y)
|
|
|
|
|
(ADDC x (MOVDconst [c])) && is16Bit(c) -> (ADDCconst x [c])
|
|
|
|
|
(Select0 (ADDCconst (MOVDconst [c]) [d])) -> (MOVDconst [c+d])
|
|
|
|
|
|
|
|
|
|
// subtract with borrow
|
|
|
|
|
(SUBE x y (FlagGT)) -> (SUBC x y)
|
|
|
|
|
(SUBE x y (FlagOV)) -> (SUBC x y)
|
|
|
|
|
(Select0 (SUBC (MOVDconst [c]) (MOVDconst [d]))) -> (MOVDconst [c-d])
|
|
|
|
|
|
|
|
|
|
// collapse carry chain
|
|
|
|
|
(ADDE x y (Select1 (ADDCconst [-1] (Select0 (ADDE (MOVDconst [0]) (MOVDconst [0]) c)))))
|
|
|
|
|
-> (ADDE x y c)
|
|
|
|
|
|
|
|
|
|
// collapse borrow chain
|
|
|
|
|
(SUBE x y (Select1 (SUBC (MOVDconst [0]) (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) c))))))
|
|
|
|
|
-> (SUBE x y c)
|
|
|
|
|
|
2017-02-12 22:12:12 -05:00
|
|
|
// fused multiply-add
|
|
|
|
|
(FADD (FMUL y z) x) -> (FMADD x y z)
|
|
|
|
|
(FADDS (FMULS y z) x) -> (FMADDS x y z)
|
|
|
|
|
(FSUB (FMUL y z) x) -> (FMSUB x y z)
|
|
|
|
|
(FSUBS (FMULS y z) x) -> (FMSUBS x y z)
|
|
|
|
|
|
2016-09-14 10:42:14 -04:00
|
|
|
// Fold memory operations into operations.
|
|
|
|
|
// Exclude global data (SB) because these instructions cannot handle relative addresses.
|
|
|
|
|
// TODO(mundaym): use LARL in the assembler to handle SB?
|
|
|
|
|
// TODO(mundaym): indexed versions of these?
|
2018-10-26 10:52:59 -07:00
|
|
|
(ADD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ADDload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ADD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ADDload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ADDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ADDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ADDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ADDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ADDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ADDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ADDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(MULLD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (MULLDload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(MULLD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (MULLDload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(MULLW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (MULLWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(MULLW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (MULLWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(MULLW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (MULLWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (MULLWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(SUB <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (SUBload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(SUBW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (SUBWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(SUBW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (SUBWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(AND <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ANDload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(AND <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ANDload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ANDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ANDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ANDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ANDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ANDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ANDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ANDWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(OR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ORload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(OR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ORload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(ORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (ORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(XOR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (XORload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(XOR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (XORload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(XORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (XORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(XORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (XORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(XORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (XORWload <t> [off] {sym} x ptr mem)
|
2018-10-26 10:52:59 -07:00
|
|
|
(XORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoadClobber(v, g, x) && clobber(g)
|
2016-09-14 10:42:14 -04:00
|
|
|
-> (XORWload <t> [off] {sym} x ptr mem)
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// Combine constant stores into larger (unaligned) stores.
|
2017-04-30 14:25:57 -04:00
|
|
|
// Avoid SB because constant stores to relative offsets are
|
|
|
|
|
// emulated by the assembler and also can't handle unaligned offsets.
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstoreconst [makeValAndOff(ValAndOff(c).Val()&0xff | ValAndOff(a).Val()<<8, ValAndOff(a).Off())] {s} p mem)
|
|
|
|
|
(MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
|
|
|
|
&& clobber(x)
|
2017-04-30 14:25:57 -04:00
|
|
|
-> (MOVWstore [ValAndOff(a).Off()] {s} p (MOVDconst [int64(int32(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16))]) mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
|
|
|
|
|
|
|
|
|
|
// Combine stores into larger (unaligned) stores.
|
2016-09-27 20:30:01 -04:00
|
|
|
// It doesn't work on global data (based on SB) because stores with relative addressing
|
|
|
|
|
// require that the memory operand be aligned.
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstore [i-1] {s} p w mem)
|
|
|
|
|
(MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstore [i-1] {s} p w0 mem)
|
2016-09-27 20:30:01 -04:00
|
|
|
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstore [i-1] {s} p w mem)
|
|
|
|
|
(MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstore [i-1] {s} p w0 mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstore [i-2] {s} p w mem)
|
|
|
|
|
(MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstore [i-2] {s} p w0 mem)
|
2016-09-27 20:30:01 -04:00
|
|
|
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstore [i-2] {s} p w mem)
|
|
|
|
|
(MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstore [i-2] {s} p w0 mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDstore [i-4] {s} p w mem)
|
|
|
|
|
(MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDstore [i-4] {s} p w0 mem)
|
|
|
|
|
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstoreidx [i-1] {s} p idx w mem)
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstoreidx [i-1] {s} p idx w0 mem)
|
2016-09-27 20:30:01 -04:00
|
|
|
(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstoreidx [i-1] {s} p idx w mem)
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHstoreidx [i-1] {s} p idx w0 mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstoreidx [i-2] {s} p idx w mem)
|
|
|
|
|
(MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstoreidx [i-2] {s} p idx w0 mem)
|
2016-09-27 20:30:01 -04:00
|
|
|
(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstoreidx [i-2] {s} p idx w mem)
|
|
|
|
|
(MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWstoreidx [i-2] {s} p idx w0 mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDstoreidx [i-4] {s} p idx w mem)
|
|
|
|
|
(MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDstoreidx [i-4] {s} p idx w0 mem)
|
|
|
|
|
|
2016-09-27 20:30:01 -04:00
|
|
|
// Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
|
|
|
|
|
// Store-with-bytes-reversed instructions do not support relative memory addresses,
|
|
|
|
|
// so these stores can't operate on global data (SB).
|
|
|
|
|
(MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstore [i-1] {s} p w mem)
|
|
|
|
|
(MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstore [i-1] {s} p w0 mem)
|
|
|
|
|
(MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstore [i-1] {s} p w mem)
|
|
|
|
|
(MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstore [i-1] {s} p w0 mem)
|
|
|
|
|
(MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstore [i-2] {s} p w mem)
|
|
|
|
|
(MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstore [i-2] {s} p w0 mem)
|
|
|
|
|
(MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstore [i-2] {s} p w mem)
|
|
|
|
|
(MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstore [i-2] {s} p w0 mem)
|
|
|
|
|
(MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDBRstore [i-4] {s} p w mem)
|
|
|
|
|
(MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDBRstore [i-4] {s} p w0 mem)
|
|
|
|
|
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstoreidx [i-1] {s} p idx w mem)
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstoreidx [i-1] {s} p idx w mem)
|
|
|
|
|
(MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
|
|
|
|
|
(MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstoreidx [i-2] {s} p idx w mem)
|
|
|
|
|
(MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
|
|
|
|
|
(MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstoreidx [i-2] {s} p idx w mem)
|
|
|
|
|
(MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
|
|
|
|
|
(MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDBRstoreidx [i-4] {s} p idx w mem)
|
|
|
|
|
(MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
|
|
|
|
|
|
2017-03-29 18:06:04 +00:00
|
|
|
// Combining byte loads into larger (unaligned) loads.
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
// Big-endian loads
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
(ORW x1:(MOVBZload [i1] {s} p mem)
|
|
|
|
|
sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
|
|
|
|
|
&& i1 == i0+1
|
2016-09-12 14:50:10 -04:00
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& sh.Uses == 1
|
2017-03-25 15:05:42 -07:00
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
|
|
|
|
|
|
|
|
|
|
(OR x1:(MOVBZload [i1] {s} p mem)
|
|
|
|
|
sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
|
|
|
|
|
|
|
|
|
|
(ORW x1:(MOVHZload [i1] {s} p mem)
|
|
|
|
|
sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
|
|
|
|
|
|
|
|
|
|
(OR x1:(MOVHZload [i1] {s} p mem)
|
|
|
|
|
sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
|
2017-03-25 15:05:42 -07:00
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
(OR x1:(MOVWZload [i1] {s} p mem)
|
|
|
|
|
sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
|
|
|
|
|
&& i1 == i0+4
|
2017-03-25 15:05:42 -07:00
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
|
|
|
|
|
|
|
|
|
|
(ORW
|
|
|
|
|
s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
|
|
|
|
or:(ORW
|
|
|
|
|
s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0-8
|
|
|
|
|
&& j1 % 16 == 0
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
2016-09-12 14:50:10 -04:00
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
|
|
|
|
or:(OR
|
|
|
|
|
s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0-8
|
|
|
|
|
&& j1 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
|
|
|
|
|
or:(OR
|
|
|
|
|
s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& j1 == j0-16
|
|
|
|
|
&& j1 % 32 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& s1.Uses == 1
|
|
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(s1)
|
|
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
// Big-endian indexed loads
|
|
|
|
|
|
|
|
|
|
(ORW x1:(MOVBZloadidx [i1] {s} p idx mem)
|
|
|
|
|
sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
|
|
|
|
|
|
|
|
|
|
(OR x1:(MOVBZloadidx [i1] {s} p idx mem)
|
|
|
|
|
sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
|
|
|
|
|
|
|
|
|
|
(ORW x1:(MOVHZloadidx [i1] {s} p idx mem)
|
|
|
|
|
sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
(OR x1:(MOVHZloadidx [i1] {s} p idx mem)
|
|
|
|
|
sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
|
|
|
|
|
|
|
|
|
|
(OR x1:(MOVWZloadidx [i1] {s} p idx mem)
|
|
|
|
|
sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
|
|
|
|
|
&& i1 == i0+4
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
|
|
|
|
|
|
|
|
|
|
(ORW
|
|
|
|
|
s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
|
|
|
|
|
or:(ORW
|
|
|
|
|
s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0-8
|
|
|
|
|
&& j1 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
|
|
|
|
|
or:(OR
|
|
|
|
|
s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0-8
|
|
|
|
|
&& j1 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
|
|
|
|
|
or:(OR
|
|
|
|
|
s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& j1 == j0-16
|
|
|
|
|
&& j1 % 32 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& s1.Uses == 1
|
|
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(s1)
|
|
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
// Little-endian loads
|
2017-03-25 15:05:42 -07:00
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
(ORW x0:(MOVBZload [i0] {s} p mem)
|
|
|
|
|
sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
|
2017-03-29 18:06:04 +00:00
|
|
|
&& p.Op != OpSB
|
2017-03-30 03:30:22 +00:00
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
|
|
|
|
|
|
|
|
|
|
(OR x0:(MOVBZload [i0] {s} p mem)
|
|
|
|
|
sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
|
|
|
|
|
|
|
|
|
|
(ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
|
|
|
|
|
sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
|
|
|
|
|
|
|
|
|
|
(OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
|
|
|
|
|
sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
|
|
|
|
|
|
|
|
|
|
(OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
|
|
|
|
|
sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
|
|
|
|
|
&& i1 == i0+4
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
|
|
|
|
|
|
|
|
|
|
(ORW
|
|
|
|
|
s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
|
|
|
|
or:(ORW
|
|
|
|
|
s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
|
|
|
|
y))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0+8
|
|
|
|
|
&& j0 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
|
|
|
|
or:(OR
|
|
|
|
|
s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
|
|
|
|
y))
|
2017-03-29 18:06:04 +00:00
|
|
|
&& p.Op != OpSB
|
2017-03-30 03:30:22 +00:00
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0+8
|
|
|
|
|
&& j0 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
|
|
|
|
|
or:(OR
|
|
|
|
|
s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& j1 == j0+16
|
|
|
|
|
&& j0 % 32 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
2016-09-12 14:50:10 -04:00
|
|
|
&& s0.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& s1.Uses == 1
|
|
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(s0)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(s1)
|
|
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
// Little-endian indexed loads
|
|
|
|
|
|
|
|
|
|
(ORW x0:(MOVBZloadidx [i0] {s} p idx mem)
|
|
|
|
|
sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
|
|
|
|
|
|
|
|
|
|
(OR x0:(MOVBZloadidx [i0] {s} p idx mem)
|
|
|
|
|
sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
|
2016-09-12 14:50:10 -04:00
|
|
|
|
2017-03-30 03:30:22 +00:00
|
|
|
(ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
|
|
|
|
|
sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
|
|
|
|
|
|
|
|
|
|
(OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
|
|
|
|
|
sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
|
|
|
|
|
|
|
|
|
|
(OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
|
|
|
|
|
sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
|
|
|
|
|
&& i1 == i0+4
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& sh.Uses == 1
|
|
|
|
|
&& mergePoint(b,x0,x1) != nil
|
|
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
|
|
|
|
&& clobber(sh)
|
|
|
|
|
-> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
|
|
|
|
|
|
|
|
|
|
(ORW
|
|
|
|
|
s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
|
|
|
|
|
or:(ORW
|
|
|
|
|
s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
|
|
|
|
|
y))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0+8
|
|
|
|
|
&& j0 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
|
|
|
|
|
or:(OR
|
|
|
|
|
s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
|
|
|
|
|
y))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& i1 == i0+1
|
|
|
|
|
&& j1 == j0+8
|
|
|
|
|
&& j0 % 16 == 0
|
2016-09-12 14:50:10 -04:00
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
2017-03-30 03:30:22 +00:00
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
|
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
|
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
|
2017-03-30 03:30:22 +00:00
|
|
|
|
|
|
|
|
(OR
|
|
|
|
|
s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
|
|
|
|
|
or:(OR
|
|
|
|
|
s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
|
|
|
|
|
y))
|
|
|
|
|
&& i1 == i0+2
|
|
|
|
|
&& j1 == j0+16
|
|
|
|
|
&& j0 % 32 == 0
|
|
|
|
|
&& x0.Uses == 1
|
|
|
|
|
&& x1.Uses == 1
|
|
|
|
|
&& r0.Uses == 1
|
|
|
|
|
&& r1.Uses == 1
|
|
|
|
|
&& s0.Uses == 1
|
|
|
|
|
&& s1.Uses == 1
|
|
|
|
|
&& or.Uses == 1
|
2019-06-19 18:09:39 -04:00
|
|
|
&& mergePoint(b,x0,x1,y) != nil
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(x0)
|
|
|
|
|
&& clobber(x1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(r0)
|
|
|
|
|
&& clobber(r1)
|
2016-09-12 14:50:10 -04:00
|
|
|
&& clobber(s0)
|
|
|
|
|
&& clobber(s1)
|
2017-03-30 03:30:22 +00:00
|
|
|
&& clobber(or)
|
2019-06-19 18:09:39 -04:00
|
|
|
-> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
|
2016-09-12 14:50:10 -04:00
|
|
|
|
|
|
|
|
// Combine stores into store multiples.
|
2016-09-27 20:30:01 -04:00
|
|
|
// 32-bit
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-4)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STM2 [i-4] {s} p w0 w1 mem)
|
2016-09-27 20:30:01 -04:00
|
|
|
(MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-8)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STM3 [i-8] {s} p w0 w1 w2 mem)
|
|
|
|
|
(MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-12)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
|
|
|
|
|
(STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-8)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
|
|
|
|
|
// 64-bit
|
2016-09-12 14:50:10 -04:00
|
|
|
(MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
|
|
|
|
|
&& p.Op != OpSB
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-8)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STMG2 [i-8] {s} p w0 w1 mem)
|
2016-09-27 20:30:01 -04:00
|
|
|
(MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-16)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STMG3 [i-16] {s} p w0 w1 w2 mem)
|
|
|
|
|
(MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-24)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
|
|
|
|
|
(STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
|
|
|
|
|
&& x.Uses == 1
|
|
|
|
|
&& is20Bit(i-16)
|
|
|
|
|
&& clobber(x)
|
|
|
|
|
-> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
|
|
|
|
|
|
|
|
|
|
// Convert 32-bit store multiples into 64-bit stores.
|
|
|
|
|
(STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)
|