mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
This CL implements Mul64uhilo, Hmul64, Hmul64u, and Avg64u
on 32-bit systems, with the effect that constant division of both
int64s and uint64s can now be emitted directly in all cases,
and also that bits.Mul64 can be intrinsified on 32-bit systems.
Previously, constant division of uint64s by values 0 ≤ c ≤ 0xFFFF were
implemented as uint32 divisions by c and some fixup. After expanding
those smaller constant divisions, the code for i/999 required:
(386) 7 mul, 10 add, 2 sub, 3 rotate, 3 shift (104 bytes)
(arm) 7 mul, 9 add, 3 sub, 2 shift (104 bytes)
(mips) 7 mul, 10 add, 5 sub, 6 shift, 3 sgtu (176 bytes)
For that much code, we might as well use a full 64x64->128 multiply
that can be used for all divisors, not just small ones.
Having done that, the same i/999 now generates:
(386) 4 mul, 9 add, 2 sub, 2 or, 6 shift (112 bytes)
(arm) 4 mul, 8 add, 2 sub, 2 or, 3 shift (92 bytes)
(mips) 4 mul, 11 add, 3 sub, 6 shift, 8 sgtu, 4 or (196 bytes)
The size increase on 386 is due to a few extra register spills.
The size increase on mips is due to add-with-carry being hard.
The new approach is more general, letting us delete the old special case
and guarantee that all int64 and uint64 divisions by constants are
generated directly on 32-bit systems.
This especially speeds up code making heavy use of bits.Mul64 with
a constant argument, which happens in strconv and various crypto
packages. A few examples are benchmarked below.
pkg: cmd/compile/internal/test
benchmark \ host local linux-amd64 s7 linux-386 s7:GOARCH=386
vs base vs base vs base vs base vs base
DivconstI64 ~ ~ ~ -49.66% -21.02%
ModconstI64 ~ ~ ~ -13.45% +14.52%
DivisiblePow2constI64 ~ ~ ~ +0.97% -1.32%
DivisibleconstI64 ~ ~ ~ -20.01% -48.28%
DivisibleWDivconstI64 ~ ~ -1.76% -38.59% -42.74%
DivconstU64/3 ~ ~ ~ -13.82% -4.09%
DivconstU64/5 ~ ~ ~ -14.10% -3.54%
DivconstU64/37 -2.07% -4.45% ~ -19.60% -9.55%
DivconstU64/1234567 ~ ~ ~ -61.55% -56.93%
ModconstU64 ~ ~ ~ -6.25% ~
DivisibleconstU64 ~ ~ ~ -2.78% -7.82%
DivisibleWDivconstU64 ~ ~ ~ +4.23% +2.56%
pkg: math/bits
benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386
vs base vs base vs base vs base
Add ~ ~ ~ ~
Add32 +1.59% ~ ~ ~
Add64 ~ ~ ~ ~
Add64multiple ~ ~ ~ ~
Sub ~ ~ ~ ~
Sub32 ~ ~ ~ ~
Sub64 ~ ~ -9.20% ~
Sub64multiple ~ ~ ~ ~
Mul ~ ~ ~ ~
Mul32 ~ ~ ~ ~
Mul64 ~ ~ -41.58% -53.21%
Div ~ ~ ~ ~
Div32 ~ ~ ~ ~
Div64 ~ ~ ~ ~
pkg: strconv
benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386
vs base vs base vs base vs base
ParseInt/Pos/7bit ~ ~ -11.08% -6.75%
ParseInt/Pos/26bit ~ ~ -13.65% -11.02%
ParseInt/Pos/31bit ~ ~ -14.65% -9.71%
ParseInt/Pos/56bit -1.80% ~ -17.97% -10.78%
ParseInt/Pos/63bit ~ ~ -13.85% -9.63%
ParseInt/Neg/7bit ~ ~ -12.14% -7.26%
ParseInt/Neg/26bit ~ ~ -14.18% -9.81%
ParseInt/Neg/31bit ~ ~ -14.51% -9.02%
ParseInt/Neg/56bit ~ ~ -15.79% -9.79%
ParseInt/Neg/63bit ~ ~ -15.68% -11.07%
AppendFloat/Decimal ~ ~ -7.25% -12.26%
AppendFloat/Float ~ ~ -15.96% -19.45%
AppendFloat/Exp ~ ~ -13.96% -17.76%
AppendFloat/NegExp ~ ~ -14.89% -20.27%
AppendFloat/LongExp ~ ~ -12.68% -17.97%
AppendFloat/Big ~ ~ -11.10% -16.64%
AppendFloat/BinaryExp ~ ~ ~ ~
AppendFloat/32Integer ~ ~ -10.05% -10.91%
AppendFloat/32ExactFraction ~ ~ -8.93% -13.00%
AppendFloat/32Point ~ ~ -10.36% -14.89%
AppendFloat/32Exp ~ ~ -9.88% -13.54%
AppendFloat/32NegExp ~ ~ -10.16% -14.26%
AppendFloat/32Shortest ~ ~ -11.39% -14.96%
AppendFloat/32Fixed8Hard ~ ~ ~ -2.31%
AppendFloat/32Fixed9Hard ~ ~ ~ -7.01%
AppendFloat/64Fixed1 ~ ~ -2.83% -8.23%
AppendFloat/64Fixed2 ~ ~ ~ -7.94%
AppendFloat/64Fixed3 ~ ~ -4.07% -7.22%
AppendFloat/64Fixed4 ~ ~ -7.24% -7.62%
AppendFloat/64Fixed12 ~ ~ -6.57% -4.82%
AppendFloat/64Fixed16 ~ ~ -4.00% -5.81%
AppendFloat/64Fixed12Hard -2.22% ~ -4.07% -6.35%
AppendFloat/64Fixed17Hard -2.12% ~ ~ -3.79%
AppendFloat/64Fixed18Hard -1.89% ~ +2.48% ~
AppendFloat/Slowpath64 -1.85% ~ -14.49% -18.21%
AppendFloat/SlowpathDenormal64 ~ ~ -13.08% -19.41%
pkg: crypto/internal/fips140/nistec/fiat
benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386
vs base vs base vs base vs base
Mul/P224 ~ ~ -29.95% -39.60%
Mul/P384 ~ ~ -37.11% -63.33%
Mul/P521 ~ ~ -26.62% -12.42%
Square/P224 +1.46% ~ -40.62% -49.18%
Square/P384 ~ ~ -45.51% -69.68%
Square/P521 +90.37% ~ -25.26% -11.23%
(The +90% is a separate problem and not real; that much variation
can be seen on that system by running the same binary from two
different files.)
pkg: crypto/internal/fips140/edwards25519
benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386
vs base vs base vs base vs base
EncodingDecoding ~ ~ -34.67% -35.75%
ScalarBaseMult ~ ~ -31.25% -30.29%
ScalarMult ~ ~ -33.45% -32.54%
VarTimeDoubleScalarBaseMult ~ ~ -33.78% -33.68%
Change-Id: Id3c91d42cd01def6731b755e99f8f40c6ad1bb65
Reviewed-on: https://go-review.googlesource.com/c/go/+/716061
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Russ Cox <rsc@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
481 lines
17 KiB
Text
481 lines
17 KiB
Text
// Copyright 2016 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// This file contains rules to decompose [u]int64 types on 32-bit
|
|
// architectures. These rules work together with the decomposeBuiltin
|
|
// pass which handles phis of these typ.
|
|
|
|
(Last ___) => v.Args[len(v.Args)-1]
|
|
|
|
(Int64Hi (Int64Make hi _)) => hi
|
|
(Int64Lo (Int64Make _ lo)) => lo
|
|
(Select0 (MakeTuple x y)) => x
|
|
(Select1 (MakeTuple x y)) => y
|
|
|
|
(Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
|
|
(Int64Make
|
|
(Load <typ.Int32> (OffPtr <typ.Int32Ptr> [4] ptr) mem)
|
|
(Load <typ.UInt32> ptr mem))
|
|
|
|
(Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && !t.IsSigned() =>
|
|
(Int64Make
|
|
(Load <typ.UInt32> (OffPtr <typ.UInt32Ptr> [4] ptr) mem)
|
|
(Load <typ.UInt32> ptr mem))
|
|
|
|
(Load <t> ptr mem) && is64BitInt(t) && config.BigEndian && t.IsSigned() =>
|
|
(Int64Make
|
|
(Load <typ.Int32> ptr mem)
|
|
(Load <typ.UInt32> (OffPtr <typ.UInt32Ptr> [4] ptr) mem))
|
|
|
|
(Load <t> ptr mem) && is64BitInt(t) && config.BigEndian && !t.IsSigned() =>
|
|
(Int64Make
|
|
(Load <typ.UInt32> ptr mem)
|
|
(Load <typ.UInt32> (OffPtr <typ.UInt32Ptr> [4] ptr) mem))
|
|
|
|
(Store {t} dst (Int64Make hi lo) mem) && t.Size() == 8 && !config.BigEndian =>
|
|
(Store {hi.Type}
|
|
(OffPtr <hi.Type.PtrTo()> [4] dst)
|
|
hi
|
|
(Store {lo.Type} dst lo mem))
|
|
|
|
(Store {t} dst (Int64Make hi lo) mem) && t.Size() == 8 && config.BigEndian =>
|
|
(Store {lo.Type}
|
|
(OffPtr <lo.Type.PtrTo()> [4] dst)
|
|
lo
|
|
(Store {hi.Type} dst hi mem))
|
|
|
|
// These are not enabled during decomposeBuiltin if late call expansion, but they are always enabled for softFloat
|
|
(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") =>
|
|
(Int64Make
|
|
(Arg <typ.Int32> {n} [off+4])
|
|
(Arg <typ.UInt32> {n} [off]))
|
|
(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") =>
|
|
(Int64Make
|
|
(Arg <typ.UInt32> {n} [off+4])
|
|
(Arg <typ.UInt32> {n} [off]))
|
|
|
|
(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") =>
|
|
(Int64Make
|
|
(Arg <typ.Int32> {n} [off])
|
|
(Arg <typ.UInt32> {n} [off+4]))
|
|
(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(b.Func.pass.name == "decompose builtin") =>
|
|
(Int64Make
|
|
(Arg <typ.UInt32> {n} [off])
|
|
(Arg <typ.UInt32> {n} [off+4]))
|
|
|
|
(Add64 <t> x y) =>
|
|
(Last <t>
|
|
x0: (Int64Lo x)
|
|
x1: (Int64Hi x)
|
|
y0: (Int64Lo y)
|
|
y1: (Int64Hi y)
|
|
add: (Add32carry x0 y0)
|
|
(Int64Make
|
|
(Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add))
|
|
(Select0 <typ.UInt32> add)))
|
|
|
|
(Sub64 <t> x y) =>
|
|
(Last <t>
|
|
x0: (Int64Lo x)
|
|
x1: (Int64Hi x)
|
|
y0: (Int64Lo y)
|
|
y1: (Int64Hi y)
|
|
sub: (Sub32carry x0 y0)
|
|
(Int64Make
|
|
(Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub))
|
|
(Select0 <typ.UInt32> sub)))
|
|
|
|
(Mul64 <t> x y) =>
|
|
(Last <t>
|
|
x0: (Int64Lo x)
|
|
x1: (Int64Hi x)
|
|
y0: (Int64Lo y)
|
|
y1: (Int64Hi y)
|
|
x0y0: (Mul32uhilo x0 y0)
|
|
x0y0Hi: (Select0 <typ.UInt32> x0y0)
|
|
x0y0Lo: (Select1 <typ.UInt32> x0y0)
|
|
(Int64Make
|
|
(Add32 <typ.UInt32> x0y0Hi
|
|
(Add32 <typ.UInt32>
|
|
(Mul32 <typ.UInt32> x0 y1)
|
|
(Mul32 <typ.UInt32> x1 y0)))
|
|
x0y0Lo))
|
|
|
|
(Mul64uhilo <t> x y) =>
|
|
(Last <t>
|
|
x0: (Int64Lo x)
|
|
x1: (Int64Hi x)
|
|
y0: (Int64Lo y)
|
|
y1: (Int64Hi y)
|
|
x0y0: (Mul32uhilo x0 y0)
|
|
x0y1: (Mul32uhilo x0 y1)
|
|
x1y0: (Mul32uhilo x1 y0)
|
|
x1y1: (Mul32uhilo x1 y1)
|
|
x0y0Hi: (Select0 <typ.UInt32> x0y0)
|
|
x0y0Lo: (Select1 <typ.UInt32> x0y0)
|
|
x0y1Hi: (Select0 <typ.UInt32> x0y1)
|
|
x0y1Lo: (Select1 <typ.UInt32> x0y1)
|
|
x1y0Hi: (Select0 <typ.UInt32> x1y0)
|
|
x1y0Lo: (Select1 <typ.UInt32> x1y0)
|
|
x1y1Hi: (Select0 <typ.UInt32> x1y1)
|
|
x1y1Lo: (Select1 <typ.UInt32> x1y1)
|
|
w1a: (Add32carry x0y0Hi x0y1Lo)
|
|
w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a))
|
|
w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a))
|
|
w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a))
|
|
w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b))
|
|
w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b))
|
|
(MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)>
|
|
(Int64Make w3b (Select0 <typ.UInt32> w2b))
|
|
(Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
|
|
|
|
(Hmul64u x y) => (Select0 (Mul64uhilo x y))
|
|
|
|
// Hacker's Delight p. 175: signed hmul = unsigned hmul - (x<0)&y - (y<0)&x.
|
|
(Hmul64 x y) =>
|
|
(Last
|
|
p: (Hmul64u <typ.UInt64> x y)
|
|
xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs)
|
|
ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys)
|
|
(Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
|
|
|
|
// (x+y)/2 => (x-y)/2 + y
|
|
(Avg64u <t> x y) => (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
|
|
|
|
|
|
(And64 x y) =>
|
|
(Int64Make
|
|
(And32 <typ.UInt32> (Int64Hi x) (Int64Hi y))
|
|
(And32 <typ.UInt32> (Int64Lo x) (Int64Lo y)))
|
|
|
|
(Or64 x y) =>
|
|
(Int64Make
|
|
(Or32 <typ.UInt32> (Int64Hi x) (Int64Hi y))
|
|
(Or32 <typ.UInt32> (Int64Lo x) (Int64Lo y)))
|
|
|
|
(Xor64 x y) =>
|
|
(Int64Make
|
|
(Xor32 <typ.UInt32> (Int64Hi x) (Int64Hi y))
|
|
(Xor32 <typ.UInt32> (Int64Lo x) (Int64Lo y)))
|
|
|
|
(Neg64 <t> x) => (Sub64 (Const64 <t> [0]) x)
|
|
|
|
(Com64 x) =>
|
|
(Int64Make
|
|
(Com32 <typ.UInt32> (Int64Hi x))
|
|
(Com32 <typ.UInt32> (Int64Lo x)))
|
|
|
|
// Sadly, just because we know that x is non-zero,
|
|
// we don't know whether either component is,
|
|
// so just treat Ctz64NonZero the same as Ctz64.
|
|
(Ctz64NonZero ...) => (Ctz64 ...)
|
|
|
|
(Ctz64 x) =>
|
|
(Add32 <typ.UInt32>
|
|
(Ctz32 <typ.UInt32> (Int64Lo x))
|
|
(And32 <typ.UInt32>
|
|
(Com32 <typ.UInt32> (Zeromask (Int64Lo x)))
|
|
(Ctz32 <typ.UInt32> (Int64Hi x))))
|
|
|
|
(BitLen64 x) =>
|
|
(Add32 <typ.Int>
|
|
(BitLen32 <typ.Int> (Int64Hi x))
|
|
(BitLen32 <typ.Int>
|
|
(Or32 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Zeromask (Int64Hi x)))))
|
|
|
|
(Bswap64 x) =>
|
|
(Int64Make
|
|
(Bswap32 <typ.UInt32> (Int64Lo x))
|
|
(Bswap32 <typ.UInt32> (Int64Hi x)))
|
|
|
|
(SignExt32to64 x) => (Int64Make (Signmask x) x)
|
|
(SignExt16to64 x) => (SignExt32to64 (SignExt16to32 x))
|
|
(SignExt8to64 x) => (SignExt32to64 (SignExt8to32 x))
|
|
|
|
(ZeroExt32to64 x) => (Int64Make (Const32 <typ.UInt32> [0]) x)
|
|
(ZeroExt16to64 x) => (ZeroExt32to64 (ZeroExt16to32 x))
|
|
(ZeroExt8to64 x) => (ZeroExt32to64 (ZeroExt8to32 x))
|
|
|
|
(Trunc64to32 (Int64Make _ lo)) => lo
|
|
(Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo)
|
|
(Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo)
|
|
// Most general
|
|
(Trunc64to32 x) => (Int64Lo x)
|
|
(Trunc64to16 x) => (Trunc32to16 (Int64Lo x))
|
|
(Trunc64to8 x) => (Trunc32to8 (Int64Lo x))
|
|
|
|
(Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
|
|
(Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x)
|
|
(Rsh32Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
|
|
(Lsh16x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
|
|
(Rsh16x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask (SignExt16to32 x))
|
|
(Rsh16Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
|
|
(Lsh8x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
|
|
(Rsh8x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask (SignExt8to32 x))
|
|
(Rsh8Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
|
|
|
|
(Lsh32x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh32x32 [c] x lo)
|
|
(Rsh32x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh32x32 [c] x lo)
|
|
(Rsh32Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh32Ux32 [c] x lo)
|
|
(Lsh16x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh16x32 [c] x lo)
|
|
(Rsh16x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh16x32 [c] x lo)
|
|
(Rsh16Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh16Ux32 [c] x lo)
|
|
(Lsh8x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh8x32 [c] x lo)
|
|
(Rsh8x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh8x32 [c] x lo)
|
|
(Rsh8Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh8Ux32 [c] x lo)
|
|
|
|
(Lsh64x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const64 [0])
|
|
(Rsh64x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Int64Make (Signmask (Int64Hi x)) (Signmask (Int64Hi x)))
|
|
(Rsh64Ux64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const64 [0])
|
|
|
|
(Lsh64x64 [c] x (Int64Make (Const32 [0]) lo)) => (Lsh64x32 [c] x lo)
|
|
(Rsh64x64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh64x32 [c] x lo)
|
|
(Rsh64Ux64 [c] x (Int64Make (Const32 [0]) lo)) => (Rsh64Ux32 [c] x lo)
|
|
|
|
// turn x64 non-constant shifts to x32 shifts
|
|
// if high 32-bit of the shift is nonzero, make a huge shift
|
|
(Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
(Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
|
|
(Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
|
|
|
|
// Most general
|
|
(Lsh64x64 x y) => (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh64x64 x y) => (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Lsh32x64 x y) => (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh32x64 x y) => (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Lsh16x64 x y) => (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh16x64 x y) => (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Lsh8x64 x y) => (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh8x64 x y) => (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
(Rsh8Ux64 x y) => (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
|
|
|
|
|
|
(RotateLeft64 x (Int64Make hi lo)) => (RotateLeft64 x lo)
|
|
(RotateLeft32 x (Int64Make hi lo)) => (RotateLeft32 x lo)
|
|
(RotateLeft16 x (Int64Make hi lo)) => (RotateLeft16 x lo)
|
|
(RotateLeft8 x (Int64Make hi lo)) => (RotateLeft8 x lo)
|
|
|
|
// RotateLeft64 by constant, for use in divmod.
|
|
(RotateLeft64 <t> x (Const(64|32|16|8) [c])) && c&63 == 0 => x
|
|
(RotateLeft64 <t> x (Const(64|32|16|8) [c])) && c&63 == 32 => (Int64Make <t> (Int64Lo x) (Int64Hi x))
|
|
(RotateLeft64 <t> x (Const(64|32|16|8) [c])) && 0 < c&63 && c&63 < 32 =>
|
|
(Int64Make <t>
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [int32(c&31)]))
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Lo x) (Const32 <typ.UInt32> [int32(32-c&31)])))
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x32 <typ.UInt32> (Int64Lo x) (Const32 <typ.UInt32> [int32(c&31)]))
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [int32(32-c&31)]))))
|
|
(RotateLeft64 <t> x (Const(64|32|16|8) [c])) && 32 < c&63 && c&63 < 64 =>
|
|
(Int64Make <t>
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x32 <typ.UInt32> (Int64Lo x) (Const32 <typ.UInt32> [int32(c&31)]))
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [int32(32-c&31)])))
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [int32(c&31)]))
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Lo x) (Const32 <typ.UInt32> [int32(32-c&31)]))))
|
|
|
|
// Clean up constants a little
|
|
(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c == 0 => y
|
|
(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c != 0 => (Const32 <typ.UInt32> [-1])
|
|
|
|
// 64x left shift
|
|
// result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0
|
|
// result.lo = lo<<s
|
|
(Lsh64x32 x s) =>
|
|
(Int64Make
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x32 <typ.UInt32> (Int64Hi x) s)
|
|
(Rsh32Ux32 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
|
|
(Lsh32x32 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))))
|
|
(Lsh32x32 <typ.UInt32> (Int64Lo x) s))
|
|
(Lsh64x16 x s) =>
|
|
(Int64Make
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x16 <typ.UInt32> (Int64Hi x) s)
|
|
(Rsh32Ux16 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
|
|
(Lsh32x16 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))))
|
|
(Lsh32x16 <typ.UInt32> (Int64Lo x) s))
|
|
(Lsh64x8 x s) =>
|
|
(Int64Make
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Lsh32x8 <typ.UInt32> (Int64Hi x) s)
|
|
(Rsh32Ux8 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
|
|
(Lsh32x8 <typ.UInt32>
|
|
(Int64Lo x)
|
|
(Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))))
|
|
(Lsh32x8 <typ.UInt32> (Int64Lo x) s))
|
|
|
|
// 64x unsigned right shift
|
|
// result.hi = hi>>s
|
|
// result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0
|
|
(Rsh64Ux32 x s) =>
|
|
(Int64Make
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Hi x) s)
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Lo x) s)
|
|
(Lsh32x32 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
|
|
(Rsh32Ux32 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))))
|
|
(Rsh64Ux16 x s) =>
|
|
(Int64Make
|
|
(Rsh32Ux16 <typ.UInt32> (Int64Hi x) s)
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Rsh32Ux16 <typ.UInt32> (Int64Lo x) s)
|
|
(Lsh32x16 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
|
|
(Rsh32Ux16 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))))
|
|
(Rsh64Ux8 x s) =>
|
|
(Int64Make
|
|
(Rsh32Ux8 <typ.UInt32> (Int64Hi x) s)
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Rsh32Ux8 <typ.UInt32> (Int64Lo x) s)
|
|
(Lsh32x8 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
|
|
(Rsh32Ux8 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))))
|
|
|
|
// 64x signed right shift
|
|
// result.hi = hi>>s
|
|
// result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1
|
|
(Rsh64x32 x s) =>
|
|
(Int64Make
|
|
(Rsh32x32 <typ.UInt32> (Int64Hi x) s)
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Rsh32Ux32 <typ.UInt32> (Int64Lo x) s)
|
|
(Lsh32x32 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
|
|
(And32 <typ.UInt32>
|
|
(Rsh32x32 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))
|
|
(Zeromask
|
|
(Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5]))))))
|
|
(Rsh64x16 x s) =>
|
|
(Int64Make
|
|
(Rsh32x16 <typ.UInt32> (Int64Hi x) s)
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Rsh32Ux16 <typ.UInt32> (Int64Lo x) s)
|
|
(Lsh32x16 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
|
|
(And32 <typ.UInt32>
|
|
(Rsh32x16 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))
|
|
(Zeromask
|
|
(ZeroExt16to32
|
|
(Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5])))))))
|
|
(Rsh64x8 x s) =>
|
|
(Int64Make
|
|
(Rsh32x8 <typ.UInt32> (Int64Hi x) s)
|
|
(Or32 <typ.UInt32>
|
|
(Or32 <typ.UInt32>
|
|
(Rsh32Ux8 <typ.UInt32> (Int64Lo x) s)
|
|
(Lsh32x8 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
|
|
(And32 <typ.UInt32>
|
|
(Rsh32x8 <typ.UInt32>
|
|
(Int64Hi x)
|
|
(Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))
|
|
(Zeromask
|
|
(ZeroExt8to32
|
|
(Rsh8Ux32 <typ.UInt8> s (Const32 <typ.UInt32> [5])))))))
|
|
|
|
(Const64 <t> [c]) && t.IsSigned() =>
|
|
(Int64Make (Const32 <typ.Int32> [int32(c>>32)]) (Const32 <typ.UInt32> [int32(c)]))
|
|
(Const64 <t> [c]) && !t.IsSigned() =>
|
|
(Int64Make (Const32 <typ.UInt32> [int32(c>>32)]) (Const32 <typ.UInt32> [int32(c)]))
|
|
|
|
(Eq64 x y) =>
|
|
(AndB
|
|
(Eq32 (Int64Hi x) (Int64Hi y))
|
|
(Eq32 (Int64Lo x) (Int64Lo y)))
|
|
|
|
(Neq64 x y) =>
|
|
(OrB
|
|
(Neq32 (Int64Hi x) (Int64Hi y))
|
|
(Neq32 (Int64Lo x) (Int64Lo y)))
|
|
|
|
(Less64U x y) =>
|
|
(OrB
|
|
(Less32U (Int64Hi x) (Int64Hi y))
|
|
(AndB
|
|
(Eq32 (Int64Hi x) (Int64Hi y))
|
|
(Less32U (Int64Lo x) (Int64Lo y))))
|
|
|
|
(Leq64U x y) =>
|
|
(OrB
|
|
(Less32U (Int64Hi x) (Int64Hi y))
|
|
(AndB
|
|
(Eq32 (Int64Hi x) (Int64Hi y))
|
|
(Leq32U (Int64Lo x) (Int64Lo y))))
|
|
|
|
(Less64 x y) =>
|
|
(OrB
|
|
(Less32 (Int64Hi x) (Int64Hi y))
|
|
(AndB
|
|
(Eq32 (Int64Hi x) (Int64Hi y))
|
|
(Less32U (Int64Lo x) (Int64Lo y))))
|
|
|
|
(Leq64 x y) =>
|
|
(OrB
|
|
(Less32 (Int64Hi x) (Int64Hi y))
|
|
(AndB
|
|
(Eq32 (Int64Hi x) (Int64Hi y))
|
|
(Leq32U (Int64Lo x) (Int64Lo y))))
|