cmd/compile: implement bits.Mul64 on 32-bit systems

This CL implements Mul64uhilo, Hmul64, Hmul64u, and Avg64u
on 32-bit systems, with the effect that constant division of both
int64s and uint64s can now be emitted directly in all cases,
and also that bits.Mul64 can be intrinsified on 32-bit systems.

Previously, constant division of uint64s by values 0 ≤ c ≤ 0xFFFF were
implemented as uint32 divisions by c and some fixup. After expanding
those smaller constant divisions, the code for i/999 required:

	(386) 7 mul, 10 add, 2 sub, 3 rotate, 3 shift (104 bytes)
	(arm) 7 mul, 9 add, 3 sub, 2 shift (104 bytes)
	(mips) 7 mul, 10 add, 5 sub, 6 shift, 3 sgtu (176 bytes)

For that much code, we might as well use a full 64x64->128 multiply
that can be used for all divisors, not just small ones.
Having done that, the same i/999 now generates:

	(386) 4 mul, 9 add, 2 sub, 2 or, 6 shift (112 bytes)
	(arm) 4 mul, 8 add, 2 sub, 2 or, 3 shift (92 bytes)
	(mips) 4 mul, 11 add, 3 sub, 6 shift, 8 sgtu, 4 or (196 bytes)

The size increase on 386 is due to a few extra register spills.
The size increase on mips is due to add-with-carry being hard.

The new approach is more general, letting us delete the old special case
and guarantee that all int64 and uint64 divisions by constants are
generated directly on 32-bit systems.

This especially speeds up code making heavy use of bits.Mul64 with
a constant argument, which happens in strconv and various crypto
packages. A few examples are benchmarked below.

pkg: cmd/compile/internal/test

benchmark \ host                      local  linux-amd64       s7  linux-386  s7:GOARCH=386
                                    vs base      vs base  vs base    vs base        vs base
DivconstI64                               ~            ~        ~    -49.66%        -21.02%
ModconstI64                               ~            ~        ~    -13.45%        +14.52%
DivisiblePow2constI64                     ~            ~        ~     +0.97%         -1.32%
DivisibleconstI64                         ~            ~        ~    -20.01%        -48.28%
DivisibleWDivconstI64                     ~            ~   -1.76%    -38.59%        -42.74%
DivconstU64/3                             ~            ~        ~    -13.82%         -4.09%
DivconstU64/5                             ~            ~        ~    -14.10%         -3.54%
DivconstU64/37                       -2.07%       -4.45%        ~    -19.60%         -9.55%
DivconstU64/1234567                       ~            ~        ~    -61.55%        -56.93%
ModconstU64                               ~            ~        ~     -6.25%              ~
DivisibleconstU64                         ~            ~        ~     -2.78%         -7.82%
DivisibleWDivconstU64                     ~            ~        ~     +4.23%         +2.56%

pkg: math/bits

benchmark \ host         s7  linux-amd64  linux-386  s7:GOARCH=386
                    vs base      vs base    vs base        vs base
Add                       ~            ~          ~              ~
Add32                +1.59%            ~          ~              ~
Add64                     ~            ~          ~              ~
Add64multiple             ~            ~          ~              ~
Sub                       ~            ~          ~              ~
Sub32                     ~            ~          ~              ~
Sub64                     ~            ~     -9.20%              ~
Sub64multiple             ~            ~          ~              ~
Mul                       ~            ~          ~              ~
Mul32                     ~            ~          ~              ~
Mul64                     ~            ~    -41.58%        -53.21%
Div                       ~            ~          ~              ~
Div32                     ~            ~          ~              ~
Div64                     ~            ~          ~              ~

pkg: strconv

benchmark \ host                       s7  linux-amd64  linux-386  s7:GOARCH=386
                                  vs base      vs base    vs base        vs base
ParseInt/Pos/7bit                       ~            ~    -11.08%         -6.75%
ParseInt/Pos/26bit                      ~            ~    -13.65%        -11.02%
ParseInt/Pos/31bit                      ~            ~    -14.65%         -9.71%
ParseInt/Pos/56bit                 -1.80%            ~    -17.97%        -10.78%
ParseInt/Pos/63bit                      ~            ~    -13.85%         -9.63%
ParseInt/Neg/7bit                       ~            ~    -12.14%         -7.26%
ParseInt/Neg/26bit                      ~            ~    -14.18%         -9.81%
ParseInt/Neg/31bit                      ~            ~    -14.51%         -9.02%
ParseInt/Neg/56bit                      ~            ~    -15.79%         -9.79%
ParseInt/Neg/63bit                      ~            ~    -15.68%        -11.07%
AppendFloat/Decimal                     ~            ~     -7.25%        -12.26%
AppendFloat/Float                       ~            ~    -15.96%        -19.45%
AppendFloat/Exp                         ~            ~    -13.96%        -17.76%
AppendFloat/NegExp                      ~            ~    -14.89%        -20.27%
AppendFloat/LongExp                     ~            ~    -12.68%        -17.97%
AppendFloat/Big                         ~            ~    -11.10%        -16.64%
AppendFloat/BinaryExp                   ~            ~          ~              ~
AppendFloat/32Integer                   ~            ~    -10.05%        -10.91%
AppendFloat/32ExactFraction             ~            ~     -8.93%        -13.00%
AppendFloat/32Point                     ~            ~    -10.36%        -14.89%
AppendFloat/32Exp                       ~            ~     -9.88%        -13.54%
AppendFloat/32NegExp                    ~            ~    -10.16%        -14.26%
AppendFloat/32Shortest                  ~            ~    -11.39%        -14.96%
AppendFloat/32Fixed8Hard                ~            ~          ~         -2.31%
AppendFloat/32Fixed9Hard                ~            ~          ~         -7.01%
AppendFloat/64Fixed1                    ~            ~     -2.83%         -8.23%
AppendFloat/64Fixed2                    ~            ~          ~         -7.94%
AppendFloat/64Fixed3                    ~            ~     -4.07%         -7.22%
AppendFloat/64Fixed4                    ~            ~     -7.24%         -7.62%
AppendFloat/64Fixed12                   ~            ~     -6.57%         -4.82%
AppendFloat/64Fixed16                   ~            ~     -4.00%         -5.81%
AppendFloat/64Fixed12Hard          -2.22%            ~     -4.07%         -6.35%
AppendFloat/64Fixed17Hard          -2.12%            ~          ~         -3.79%
AppendFloat/64Fixed18Hard          -1.89%            ~     +2.48%              ~
AppendFloat/Slowpath64             -1.85%            ~    -14.49%        -18.21%
AppendFloat/SlowpathDenormal64          ~            ~    -13.08%        -19.41%

pkg: crypto/internal/fips140/nistec/fiat

benchmark \ host         s7  linux-amd64  linux-386  s7:GOARCH=386
                    vs base      vs base    vs base        vs base
Mul/P224                  ~            ~    -29.95%        -39.60%
Mul/P384                  ~            ~    -37.11%        -63.33%
Mul/P521                  ~            ~    -26.62%        -12.42%
Square/P224          +1.46%            ~    -40.62%        -49.18%
Square/P384               ~            ~    -45.51%        -69.68%
Square/P521         +90.37%            ~    -25.26%        -11.23%

(The +90% is a separate problem and not real; that much variation
can be seen on that system by running the same binary from two
different files.)

pkg: crypto/internal/fips140/edwards25519

benchmark \ host                    s7  linux-amd64  linux-386  s7:GOARCH=386
                               vs base      vs base    vs base        vs base
EncodingDecoding                     ~            ~    -34.67%        -35.75%
ScalarBaseMult                       ~            ~    -31.25%        -30.29%
ScalarMult                           ~            ~    -33.45%        -32.54%
VarTimeDoubleScalarBaseMult          ~            ~    -33.78%        -33.68%

Change-Id: Id3c91d42cd01def6731b755e99f8f40c6ad1bb65
Reviewed-on: https://go-review.googlesource.com/c/go/+/716061
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Russ Cox <rsc@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Russ Cox 2025-10-27 19:41:39 -04:00 committed by Gopher Robot
parent 38317c44e7
commit 1e5bb416d8
23 changed files with 663 additions and 342 deletions

View file

@ -245,6 +245,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = r p.To.Reg = r
case ssa.OpARMADDS, case ssa.OpARMADDS,
ssa.OpARMADCS,
ssa.OpARMSUBS: ssa.OpARMSUBS:
r := v.Reg0() r := v.Reg0()
r1 := v.Args[0].Reg() r1 := v.Args[0].Reg()

View file

@ -7,6 +7,7 @@
(Add(32|64)F ...) => (ADDS(S|D) ...) (Add(32|64)F ...) => (ADDS(S|D) ...)
(Add32carry ...) => (ADDLcarry ...) (Add32carry ...) => (ADDLcarry ...)
(Add32withcarry ...) => (ADCL ...) (Add32withcarry ...) => (ADCL ...)
(Add32carrywithcarry ...) => (ADCLcarry ...)
(Sub(Ptr|32|16|8) ...) => (SUBL ...) (Sub(Ptr|32|16|8) ...) => (SUBL ...)
(Sub(32|64)F ...) => (SUBS(S|D) ...) (Sub(32|64)F ...) => (SUBS(S|D) ...)

View file

@ -99,6 +99,7 @@ func init() {
gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
gp2carry1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
@ -184,6 +185,7 @@ func init() {
{name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates <carry,result> pair {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates <carry,result> pair
{name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates <carry,result> pair {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates <carry,result> pair
{name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags
{name: "ADCLcarry", argLength: 3, reg: gp2carry1carry, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags, generates <carry,result> pair
{name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags
{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, // arg0 - arg1 {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, // arg0 - arg1

View file

@ -6,6 +6,7 @@
(Add(32|64)F ...) => (ADD(F|D) ...) (Add(32|64)F ...) => (ADD(F|D) ...)
(Add32carry ...) => (ADDS ...) (Add32carry ...) => (ADDS ...)
(Add32withcarry ...) => (ADC ...) (Add32withcarry ...) => (ADC ...)
(Add32carrywithcarry ...) => (ADCS ...)
(Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(Ptr|32|16|8) ...) => (SUB ...)
(Sub(32|64)F ...) => (SUB(F|D) ...) (Sub(32|64)F ...) => (SUB(F|D) ...)

View file

@ -112,6 +112,7 @@ func init() {
gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}} gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}} gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
@ -165,6 +166,7 @@ func init() {
{name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag
{name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags
{name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags
{name: "ADCS", argLength: 3, reg: gp2flags1carry, asm: "ADC", commutative: true}, // arg0 + arg1 + carrry, sets carry
{name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag
{name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag
{name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag

View file

@ -9,6 +9,12 @@
(Select1 (Add32carry <t> x y)) => (SGTU <typ.Bool> x (ADD <t.FieldType(0)> x y)) (Select1 (Add32carry <t> x y)) => (SGTU <typ.Bool> x (ADD <t.FieldType(0)> x y))
(Add32withcarry <t> x y c) => (ADD c (ADD <t> x y)) (Add32withcarry <t> x y c) => (ADD c (ADD <t> x y))
(Select0 (Add32carrywithcarry <t> x y c)) => (ADD <t.FieldType(0)> c (ADD <t.FieldType(0)> x y))
(Select1 (Add32carrywithcarry <t> x y c)) =>
(OR <typ.Bool>
(SGTU <typ.Bool> x xy:(ADD <t.FieldType(0)> x y))
(SGTU <typ.Bool> xy (ADD <t.FieldType(0)> c xy)))
(Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(Ptr|32|16|8) ...) => (SUB ...)
(Sub(32|64)F ...) => (SUB(F|D) ...) (Sub(32|64)F ...) => (SUB(F|D) ...)

View file

@ -6,8 +6,12 @@
// architectures. These rules work together with the decomposeBuiltin // architectures. These rules work together with the decomposeBuiltin
// pass which handles phis of these typ. // pass which handles phis of these typ.
(Last ___) => v.Args[len(v.Args)-1]
(Int64Hi (Int64Make hi _)) => hi (Int64Hi (Int64Make hi _)) => hi
(Int64Lo (Int64Make _ lo)) => lo (Int64Lo (Int64Make _ lo)) => lo
(Select0 (MakeTuple x y)) => x
(Select1 (MakeTuple x y)) => y
(Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() => (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
(Int64Make (Int64Make
@ -60,30 +64,85 @@
(Arg <typ.UInt32> {n} [off]) (Arg <typ.UInt32> {n} [off])
(Arg <typ.UInt32> {n} [off+4])) (Arg <typ.UInt32> {n} [off+4]))
(Add64 x y) => (Add64 <t> x y) =>
(Last <t>
x0: (Int64Lo x)
x1: (Int64Hi x)
y0: (Int64Lo y)
y1: (Int64Hi y)
add: (Add32carry x0 y0)
(Int64Make (Int64Make
(Add32withcarry <typ.Int32> (Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add))
(Int64Hi x) (Select0 <typ.UInt32> add)))
(Int64Hi y)
(Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
(Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y))))
(Sub64 x y) => (Sub64 <t> x y) =>
(Last <t>
x0: (Int64Lo x)
x1: (Int64Hi x)
y0: (Int64Lo y)
y1: (Int64Hi y)
sub: (Sub32carry x0 y0)
(Int64Make (Int64Make
(Sub32withcarry <typ.Int32> (Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub))
(Int64Hi x) (Select0 <typ.UInt32> sub)))
(Int64Hi y)
(Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Mul64 x y) => (Mul64 <t> x y) =>
(Last <t>
x0: (Int64Lo x)
x1: (Int64Hi x)
y0: (Int64Lo y)
y1: (Int64Hi y)
x0y0: (Mul32uhilo x0 y0)
x0y0Hi: (Select0 <typ.UInt32> x0y0)
x0y0Lo: (Select1 <typ.UInt32> x0y0)
(Int64Make (Int64Make
(Add32 <typ.UInt32> x0y0Hi
(Add32 <typ.UInt32> (Add32 <typ.UInt32>
(Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y)) (Mul32 <typ.UInt32> x0 y1)
(Add32 <typ.UInt32> (Mul32 <typ.UInt32> x1 y0)))
(Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y)) x0y0Lo))
(Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y)))))
(Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y)))) (Mul64uhilo <t> x y) =>
(Last <t>
x0: (Int64Lo x)
x1: (Int64Hi x)
y0: (Int64Lo y)
y1: (Int64Hi y)
x0y0: (Mul32uhilo x0 y0)
x0y1: (Mul32uhilo x0 y1)
x1y0: (Mul32uhilo x1 y0)
x1y1: (Mul32uhilo x1 y1)
x0y0Hi: (Select0 <typ.UInt32> x0y0)
x0y0Lo: (Select1 <typ.UInt32> x0y0)
x0y1Hi: (Select0 <typ.UInt32> x0y1)
x0y1Lo: (Select1 <typ.UInt32> x0y1)
x1y0Hi: (Select0 <typ.UInt32> x1y0)
x1y0Lo: (Select1 <typ.UInt32> x1y0)
x1y1Hi: (Select0 <typ.UInt32> x1y1)
x1y1Lo: (Select1 <typ.UInt32> x1y1)
w1a: (Add32carry x0y0Hi x0y1Lo)
w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a))
w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a))
w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a))
w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b))
w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b))
(MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)>
(Int64Make w3b (Select0 <typ.UInt32> w2b))
(Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
(Hmul64u x y) => (Select0 (Mul64uhilo x y))
// Hacker's Delight p. 175: signed hmul = unsigned hmul - (x<0)&y - (y<0)&x.
(Hmul64 x y) =>
(Last
p: (Hmul64u <typ.UInt64> x y)
xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs)
ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys)
(Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
// (x+y)/2 => (x-y)/2 + y
(Avg64u <t> x y) => (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
(And64 x y) => (And64 x y) =>
(Int64Make (Int64Make

View file

@ -118,7 +118,7 @@
(Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)])) (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)]))
(Const64 <typ.UInt64> [smagic32(c).s - 1])) (Const64 <typ.UInt64> [smagic32(c).s - 1]))
(Rsh32x64 <t> x (Const64 <typ.UInt64> [31]))) (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
(Div64 <t> x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul => (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul =>
(Sub64 <t> (Sub64 <t>
(Rsh64x64 <t> (Rsh64x64 <t>
(Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)])) (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)]))
@ -132,7 +132,7 @@
(Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))) (Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])))
(Const64 <typ.UInt64> [smagic32(c).s])) (Const64 <typ.UInt64> [smagic32(c).s]))
(Rsh32x64 <t> x (Const64 <typ.UInt64> [31]))) (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
(Div64 <t> x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul => (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul =>
(Sub64 <t> (Sub64 <t>
(Rsh64x64 <t> (Rsh64x64 <t>
(Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))) (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])))
@ -153,7 +153,7 @@
(Rsh32Ux64 <t> (Rsh32Ux64 <t>
(Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])) (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))
(Const64 <typ.UInt64> [smagic32(c).s])) (Const64 <typ.UInt64> [smagic32(c).s]))
(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul => (Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul =>
(Rsh64Ux64 <t> (Rsh64Ux64 <t>
(Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])) (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))
(Const64 <typ.UInt64> [smagic64(c).s])) (Const64 <typ.UInt64> [smagic64(c).s]))
@ -185,7 +185,7 @@
(Rsh32Ux64 <t> (Rsh32Ux64 <t>
(Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)])) (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)]))
(Const64 <typ.UInt64> [umagic32(c).s - 1])) (Const64 <typ.UInt64> [umagic32(c).s - 1]))
(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul => (Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul =>
(Rsh64Ux64 <t> (Rsh64Ux64 <t>
(Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)])) (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)]))
(Const64 <typ.UInt64> [umagic64(c).s - 1])) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
@ -211,7 +211,7 @@
(Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1])) (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1]))
(Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)])) (Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)]))
(Const64 <typ.UInt64> [umagic32(c).s - 2])) (Const64 <typ.UInt64> [umagic32(c).s - 2]))
(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul => (Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul =>
(Rsh64Ux64 <t> (Rsh64Ux64 <t>
(Hmul64u <typ.UInt64> (Hmul64u <typ.UInt64>
(Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])) (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1]))
@ -237,52 +237,7 @@
(Rsh32Ux64 <t> (Rsh32Ux64 <t>
(Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)]))) (Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)])))
(Const64 <typ.UInt64> [umagic32(c).s - 1])) (Const64 <typ.UInt64> [umagic32(c).s - 1]))
(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul => (Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul =>
(Rsh64Ux64 <t> (Rsh64Ux64 <t>
(Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)]))) (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)])))
(Const64 <typ.UInt64> [umagic64(c).s - 1])) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
// Case 9. For unsigned 64-bit divides on 32-bit machines,
// if the constant fits in 16 bits (so that the last term
// fits in 32 bits), convert to three 32-bit divides by a constant.
//
// If 1<<32 = Q * c + R
// and x = hi << 32 + lo
//
// Then x = (hi/c*c + hi%c) << 32 + lo
// = hi/c*c<<32 + hi%c<<32 + lo
// = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c
// = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c)
// and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c
(Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul =>
(Add64
(Add64 <typ.UInt64>
(Add64 <typ.UInt64>
(Lsh64x64 <typ.UInt64>
(ZeroExt32to64
(Div32u <typ.UInt32>
(Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
(Const32 <typ.UInt32> [int32(c)])))
(Const64 <typ.UInt64> [32]))
(ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))))
(Mul64 <typ.UInt64>
(ZeroExt32to64 <typ.UInt64>
(Mod32u <typ.UInt32>
(Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
(Const32 <typ.UInt32> [int32(c)])))
(Const64 <typ.UInt64> [int64((1<<32)/c)])))
(ZeroExt32to64
(Div32u <typ.UInt32>
(Add32 <typ.UInt32>
(Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))
(Mul32 <typ.UInt32>
(Mod32u <typ.UInt32>
(Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
(Const32 <typ.UInt32> [int32(c)]))
(Const32 <typ.UInt32> [int32((1<<32)%c)])))
(Const32 <typ.UInt32> [int32(c)]))))
// Repeated from generic.rules, for expanding the expression above
// (which can then be further expanded to handle the nested Div32u).
(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c)
=> (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))

View file

@ -1106,13 +1106,13 @@
=> (Sub32 x (Mul32 <t> (Div32 <t> x (Const32 <t> [c])) (Const32 <t> [c]))) => (Sub32 x (Mul32 <t> (Div32 <t> x (Const32 <t> [c])) (Const32 <t> [c])))
(Mod64 <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63) (Mod64 <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63)
=> (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c]))) => (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c])))
(Mod8u <t> x (Const8 [c])) && x.Op != OpConst8 && c > 0 && umagicOK8( c) (Mod8u <t> x (Const8 [c])) && x.Op != OpConst8 && c != 0
=> (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c]))) => (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c])))
(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c) (Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c != 0
=> (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c]))) => (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c) (Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c != 0
=> (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c]))) => (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c) (Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c != 0
=> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c]))) => (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
// Set up for mod->mul+rot optimization in genericlateopt.rules. // Set up for mod->mul+rot optimization in genericlateopt.rules.

View file

@ -16,6 +16,9 @@ package main
// are signed or unsigned. // are signed or unsigned.
var genericOps = []opData{ var genericOps = []opData{
// Pseudo-op.
{name: "Last", argLength: -1}, // return last element of tuple; for "let" bindings
// 2-input arithmetic // 2-input arithmetic
// Types must be consistent with Go typing. Add, for example, must take two values // Types must be consistent with Go typing. Add, for example, must take two values
// of the same type and produces that same type. // of the same type and produces that same type.
@ -559,6 +562,7 @@ var genericOps = []opData{
{name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry) {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
{name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1)
{name: "Add32carrywithcarry", argLength: 3, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1 + arg2, arg2=carry, returns (value, carry)
{name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry) {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
{name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1) {name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1)

View file

@ -1271,8 +1271,10 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s
case 0: case 0:
case 1: case 1:
rr.add(stmtf("%s.AddArg(%s)", v, all.String())) rr.add(stmtf("%s.AddArg(%s)", v, all.String()))
default: case 2, 3, 4, 5, 6:
rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String())) rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String()))
default:
rr.add(stmtf("%s.AddArgs(%s)", v, all.String()))
} }
if cse != nil { if cse != nil {
@ -1313,6 +1315,12 @@ outer:
d++ d++
case d > 0 && s[i] == close: case d > 0 && s[i] == close:
d-- d--
case s[i] == ':':
// ignore spaces after colons
nonsp = true
for i+1 < len(s) && (s[i+1] == ' ' || s[i+1] == '\t') {
i++
}
default: default:
nonsp = true nonsp = true
} }
@ -1347,7 +1355,7 @@ func extract(val string) (op, typ, auxint, aux string, args []string) {
val = val[1 : len(val)-1] // remove () val = val[1 : len(val)-1] // remove ()
// Split val up into regions. // Split val up into regions.
// Split by spaces/tabs, except those contained in (), {}, [], or <>. // Split by spaces/tabs, except those contained in (), {}, [], or <> or after colon.
s := split(val) s := split(val)
// Extract restrictions and args. // Extract restrictions and args.
@ -1471,7 +1479,7 @@ func splitNameExpr(arg string) (name, expr string) {
// colon is inside the parens, such as in "(Foo x:(Bar))". // colon is inside the parens, such as in "(Foo x:(Bar))".
return "", arg return "", arg
} }
return arg[:colon], arg[colon+1:] return arg[:colon], strings.TrimSpace(arg[colon+1:])
} }
func getBlockInfo(op string, arch arch) (name string, data blockData) { func getBlockInfo(op string, arch arch) (name string, data blockData) {

View file

@ -386,6 +386,7 @@ const (
Op386ADDLcarry Op386ADDLcarry
Op386ADDLconstcarry Op386ADDLconstcarry
Op386ADCL Op386ADCL
Op386ADCLcarry
Op386ADCLconst Op386ADCLconst
Op386SUBL Op386SUBL
Op386SUBLconst Op386SUBLconst
@ -1182,6 +1183,7 @@ const (
OpARMADDSconst OpARMADDSconst
OpARMADC OpARMADC
OpARMADCconst OpARMADCconst
OpARMADCS
OpARMSUBS OpARMSUBS
OpARMSUBSconst OpARMSUBSconst
OpARMRSBSconst OpARMRSBSconst
@ -3010,6 +3012,7 @@ const (
OpWasmI64Rotl OpWasmI64Rotl
OpWasmI64Popcnt OpWasmI64Popcnt
OpLast
OpAdd8 OpAdd8
OpAdd16 OpAdd16
OpAdd32 OpAdd32
@ -3336,6 +3339,7 @@ const (
OpInt64Lo OpInt64Lo
OpAdd32carry OpAdd32carry
OpAdd32withcarry OpAdd32withcarry
OpAdd32carrywithcarry
OpSub32carry OpSub32carry
OpSub32withcarry OpSub32withcarry
OpAdd64carry OpAdd64carry
@ -3968,6 +3972,24 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ADCLcarry",
argLen: 3,
commutative: true,
resultInArg0: true,
clobberFlags: true,
asm: x86.AADCL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{1, 239}, // AX CX DX BX BP SI DI
},
outputs: []outputInfo{
{1, 0},
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{ {
name: "ADCLconst", name: "ADCLconst",
auxType: auxInt32, auxType: auxInt32,
@ -15792,6 +15814,22 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "ADCS",
argLen: 3,
commutative: true,
asm: arm.AADC,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
outputs: []outputInfo{
{1, 0},
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{ {
name: "SUBS", name: "SUBS",
argLen: 2, argLen: 2,
@ -40672,6 +40710,11 @@ var opcodeTable = [...]opInfo{
}, },
}, },
{
name: "Last",
argLen: -1,
generic: true,
},
{ {
name: "Add8", name: "Add8",
argLen: 2, argLen: 2,
@ -42480,6 +42523,12 @@ var opcodeTable = [...]opInfo{
commutative: true, commutative: true,
generic: true, generic: true,
}, },
{
name: "Add32carrywithcarry",
argLen: 3,
commutative: true,
generic: true,
},
{ {
name: "Sub32carry", name: "Sub32carry",
argLen: 2, argLen: 2,

View file

@ -257,6 +257,9 @@ func rewriteValue386(v *Value) bool {
case OpAdd32carry: case OpAdd32carry:
v.Op = Op386ADDLcarry v.Op = Op386ADDLcarry
return true return true
case OpAdd32carrywithcarry:
v.Op = Op386ADCLcarry
return true
case OpAdd32withcarry: case OpAdd32withcarry:
v.Op = Op386ADCL v.Op = Op386ADCL
return true return true

View file

@ -446,6 +446,9 @@ func rewriteValueARM(v *Value) bool {
case OpAdd32carry: case OpAdd32carry:
v.Op = OpARMADDS v.Op = OpARMADDS
return true return true
case OpAdd32carrywithcarry:
v.Op = OpARMADCS
return true
case OpAdd32withcarry: case OpAdd32withcarry:
v.Op = OpARMADC v.Op = OpARMADC
return true return true

View file

@ -6562,6 +6562,23 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
v.AddArg2(x, y) v.AddArg2(x, y)
return true return true
} }
// match: (Select0 (Add32carrywithcarry <t> x y c))
// result: (ADD <t.FieldType(0)> c (ADD <t.FieldType(0)> x y))
for {
if v_0.Op != OpAdd32carrywithcarry {
break
}
t := v_0.Type
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpMIPSADD)
v.Type = t.FieldType(0)
v0 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
v0.AddArg2(x, y)
v.AddArg2(c, v0)
return true
}
// match: (Select0 (Sub32carry <t> x y)) // match: (Select0 (Sub32carry <t> x y))
// result: (SUB <t.FieldType(0)> x y) // result: (SUB <t.FieldType(0)> x y)
for { for {
@ -6759,6 +6776,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
v.AddArg2(x, v0) v.AddArg2(x, v0)
return true return true
} }
// match: (Select1 (Add32carrywithcarry <t> x y c))
// result: (OR <typ.Bool> (SGTU <typ.Bool> x xy:(ADD <t.FieldType(0)> x y)) (SGTU <typ.Bool> xy (ADD <t.FieldType(0)> c xy)))
for {
if v_0.Op != OpAdd32carrywithcarry {
break
}
t := v_0.Type
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpMIPSOR)
v.Type = typ.Bool
v0 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool)
xy := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
xy.AddArg2(x, y)
v0.AddArg2(x, xy)
v2 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool)
v3 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
v3.AddArg2(c, xy)
v2.AddArg2(xy, v3)
v.AddArg2(v0, v2)
return true
}
// match: (Select1 (Sub32carry <t> x y)) // match: (Select1 (Sub32carry <t> x y))
// result: (SGTU <typ.Bool> (SUB <t.FieldType(0)> x y) x) // result: (SGTU <typ.Bool> (SUB <t.FieldType(0)> x y) x)
for { for {

View file

@ -12,6 +12,8 @@ func rewriteValuedec64(v *Value) bool {
return rewriteValuedec64_OpAnd64(v) return rewriteValuedec64_OpAnd64(v)
case OpArg: case OpArg:
return rewriteValuedec64_OpArg(v) return rewriteValuedec64_OpArg(v)
case OpAvg64u:
return rewriteValuedec64_OpAvg64u(v)
case OpBitLen64: case OpBitLen64:
return rewriteValuedec64_OpBitLen64(v) return rewriteValuedec64_OpBitLen64(v)
case OpBswap64: case OpBswap64:
@ -27,10 +29,16 @@ func rewriteValuedec64(v *Value) bool {
return true return true
case OpEq64: case OpEq64:
return rewriteValuedec64_OpEq64(v) return rewriteValuedec64_OpEq64(v)
case OpHmul64:
return rewriteValuedec64_OpHmul64(v)
case OpHmul64u:
return rewriteValuedec64_OpHmul64u(v)
case OpInt64Hi: case OpInt64Hi:
return rewriteValuedec64_OpInt64Hi(v) return rewriteValuedec64_OpInt64Hi(v)
case OpInt64Lo: case OpInt64Lo:
return rewriteValuedec64_OpInt64Lo(v) return rewriteValuedec64_OpInt64Lo(v)
case OpLast:
return rewriteValuedec64_OpLast(v)
case OpLeq64: case OpLeq64:
return rewriteValuedec64_OpLeq64(v) return rewriteValuedec64_OpLeq64(v)
case OpLeq64U: case OpLeq64U:
@ -57,6 +65,8 @@ func rewriteValuedec64(v *Value) bool {
return rewriteValuedec64_OpLsh8x64(v) return rewriteValuedec64_OpLsh8x64(v)
case OpMul64: case OpMul64:
return rewriteValuedec64_OpMul64(v) return rewriteValuedec64_OpMul64(v)
case OpMul64uhilo:
return rewriteValuedec64_OpMul64uhilo(v)
case OpNeg64: case OpNeg64:
return rewriteValuedec64_OpNeg64(v) return rewriteValuedec64_OpNeg64(v)
case OpNeq64: case OpNeq64:
@ -101,6 +111,10 @@ func rewriteValuedec64(v *Value) bool {
return rewriteValuedec64_OpRsh8Ux64(v) return rewriteValuedec64_OpRsh8Ux64(v)
case OpRsh8x64: case OpRsh8x64:
return rewriteValuedec64_OpRsh8x64(v) return rewriteValuedec64_OpRsh8x64(v)
case OpSelect0:
return rewriteValuedec64_OpSelect0(v)
case OpSelect1:
return rewriteValuedec64_OpSelect1(v)
case OpSignExt16to64: case OpSignExt16to64:
return rewriteValuedec64_OpSignExt16to64(v) return rewriteValuedec64_OpSignExt16to64(v)
case OpSignExt32to64: case OpSignExt32to64:
@ -133,29 +147,33 @@ func rewriteValuedec64_OpAdd64(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
// match: (Add64 x y) // match: (Add64 <t> x y)
// result: (Int64Make (Add32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y)))) // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) add: (Add32carry x0 y0) (Int64Make (Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add)) (Select0 <typ.UInt32> add)))
for { for {
t := v.Type
x := v_0 x := v_0
y := v_1 y := v_1
v.reset(OpInt64Make) v.reset(OpLast)
v0 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.Int32) v.Type = t
v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
v1.AddArg(x) x0.AddArg(x)
v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v2.AddArg(y) x1.AddArg(x)
v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
v4 := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) y0.AddArg(y)
v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v5.AddArg(x) y1.AddArg(y)
v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) add := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
v6.AddArg(y) add.AddArg2(x0, y0)
v4.AddArg2(v5, v6) v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
v3.AddArg(v4) v6 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
v0.AddArg3(v1, v2, v3) v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) v7.AddArg(add)
v7.AddArg(v4) v6.AddArg3(x1, y1, v7)
v.AddArg2(v0, v7) v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v8.AddArg(add)
v5.AddArg2(v6, v8)
v.AddArg6(x0, x1, y0, y1, add, v5)
return true return true
} }
} }
@ -268,6 +286,28 @@ func rewriteValuedec64_OpArg(v *Value) bool {
} }
return false return false
} }
func rewriteValuedec64_OpAvg64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Avg64u <t> x y)
// result: (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
for {
t := v.Type
x := v_0
y := v_1
v.reset(OpAdd64)
v0 := b.NewValue0(v.Pos, OpRsh64Ux32, t)
v1 := b.NewValue0(v.Pos, OpSub64, t)
v1.AddArg2(x, y)
v2 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
v2.AuxInt = int32ToAuxInt(1)
v0.AddArg2(v1, v2)
v.AddArg2(v0, y)
return true
}
}
func rewriteValuedec64_OpBitLen64(v *Value) bool { func rewriteValuedec64_OpBitLen64(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
@ -430,6 +470,62 @@ func rewriteValuedec64_OpEq64(v *Value) bool {
return true return true
} }
} }
func rewriteValuedec64_OpHmul64(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Hmul64 x y)
// result: (Last p: (Hmul64u <typ.UInt64> x y) xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs) ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys) (Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
for {
x := v_0
y := v_1
v.reset(OpLast)
p := b.NewValue0(v.Pos, OpHmul64u, typ.UInt64)
p.AddArg2(x, y)
xSign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
xs := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v3.AddArg(x)
v4 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
v4.AuxInt = int32ToAuxInt(31)
xs.AddArg2(v3, v4)
xSign.AddArg2(xs, xs)
ySign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
ys := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
v7 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v7.AddArg(y)
ys.AddArg2(v7, v4)
ySign.AddArg2(ys, ys)
v8 := b.NewValue0(v.Pos, OpSub64, typ.Int64)
v9 := b.NewValue0(v.Pos, OpSub64, typ.Int64)
v10 := b.NewValue0(v.Pos, OpAnd64, typ.Int64)
v10.AddArg2(xSign, y)
v9.AddArg2(p, v10)
v11 := b.NewValue0(v.Pos, OpAnd64, typ.Int64)
v11.AddArg2(ySign, x)
v8.AddArg2(v9, v11)
v.AddArg4(p, xSign, ySign, v8)
return true
}
}
func rewriteValuedec64_OpHmul64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Hmul64u x y)
// result: (Select0 (Mul64uhilo x y))
for {
x := v_0
y := v_1
v.reset(OpSelect0)
v0 := b.NewValue0(v.Pos, OpMul64uhilo, types.NewTuple(typ.UInt64, typ.UInt64))
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValuedec64_OpInt64Hi(v *Value) bool { func rewriteValuedec64_OpInt64Hi(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (Int64Hi (Int64Make hi _)) // match: (Int64Hi (Int64Make hi _))
@ -458,6 +554,14 @@ func rewriteValuedec64_OpInt64Lo(v *Value) bool {
} }
return false return false
} }
func rewriteValuedec64_OpLast(v *Value) bool {
// match: (Last ___)
// result: v.Args[len(v.Args)-1]
for {
v.copyOf(v.Args[len(v.Args)-1])
return true
}
}
func rewriteValuedec64_OpLeq64(v *Value) bool { func rewriteValuedec64_OpLeq64(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
@ -1114,35 +1218,124 @@ func rewriteValuedec64_OpMul64(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
// match: (Mul64 x y) // match: (Mul64 <t> x y)
// result: (Int64Make (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y)) (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y)) (Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))) (Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y)))) // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y0Hi: (Select0 <typ.UInt32> x0y0) x0y0Lo: (Select1 <typ.UInt32> x0y0) (Int64Make (Add32 <typ.UInt32> x0y0Hi (Add32 <typ.UInt32> (Mul32 <typ.UInt32> x0 y1) (Mul32 <typ.UInt32> x1 y0))) x0y0Lo))
for { for {
t := v.Type
x := v_0 x := v_0
y := v_1 y := v_1
v.reset(OpInt64Make) v.reset(OpLast)
v0 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) v.Type = t
v1 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
v2 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) x0.AddArg(x)
v2.AddArg(x) x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) x1.AddArg(x)
v3.AddArg(y) y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
v1.AddArg2(v2, v3) y0.AddArg(y)
v4 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v5 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) y1.AddArg(y)
v6 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
v6.AddArg(x) x0y0.AddArg2(x0, y0)
v7 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v7.AddArg(y) x0y0Hi.AddArg(x0y0)
v5.AddArg2(v6, v7) x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) x0y0Lo.AddArg(x0y0)
v9 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) v7 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
v9.AddArg2(v2, v7) v8 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
v8.AddArg(v9) v9 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
v4.AddArg2(v5, v8) v10 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
v0.AddArg2(v1, v4) v10.AddArg2(x0, y1)
v10 := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) v11 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
v10.AddArg(v9) v11.AddArg2(x1, y0)
v.AddArg2(v0, v10) v9.AddArg2(v10, v11)
v8.AddArg2(x0y0Hi, v9)
v7.AddArg2(v8, x0y0Lo)
v.AddArgs(x0, x1, y0, y1, x0y0, x0y0Hi, x0y0Lo, v7)
return true
}
}
func rewriteValuedec64_OpMul64uhilo(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (Mul64uhilo <t> x y)
// result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y1: (Mul32uhilo x0 y1) x1y0: (Mul32uhilo x1 y0) x1y1: (Mul32uhilo x1 y1) x0y0Hi: (Select0 <typ.UInt32> x0y0) x0y0Lo: (Select1 <typ.UInt32> x0y0) x0y1Hi: (Select0 <typ.UInt32> x0y1) x0y1Lo: (Select1 <typ.UInt32> x0y1) x1y0Hi: (Select0 <typ.UInt32> x1y0) x1y0Lo: (Select1 <typ.UInt32> x1y0) x1y1Hi: (Select0 <typ.UInt32> x1y1) x1y1Lo: (Select1 <typ.UInt32> x1y1) w1a: (Add32carry x0y0Hi x0y1Lo) w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a)) w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a)) w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a)) w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b)) w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b)) (MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)> (Int64Make w3b (Select0 <typ.UInt32> w2b)) (Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
for {
t := v.Type
x := v_0
y := v_1
v.reset(OpLast)
v.Type = t
x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
x0.AddArg(x)
x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
x1.AddArg(x)
y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
y0.AddArg(y)
y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
y1.AddArg(y)
x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
x0y0.AddArg2(x0, y0)
x0y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
x0y1.AddArg2(x0, y1)
x1y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
x1y0.AddArg2(x1, y0)
x1y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
x1y1.AddArg2(x1, y1)
x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
x0y0Hi.AddArg(x0y0)
x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
x0y0Lo.AddArg(x0y0)
x0y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
x0y1Hi.AddArg(x0y1)
x0y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
x0y1Lo.AddArg(x0y1)
x1y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
x1y0Hi.AddArg(x1y0)
x1y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
x1y0Lo.AddArg(x1y0)
x1y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
x1y1Hi.AddArg(x1y1)
x1y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
x1y1Lo.AddArg(x1y1)
w1a := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
w1a.AddArg2(x0y0Hi, x0y1Lo)
w2a := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags))
v18 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v18.AddArg(w1a)
w2a.AddArg3(x0y1Hi, x1y0Hi, v18)
w3a := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
v20 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
v20.AuxInt = int32ToAuxInt(0)
v21 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v21.AddArg(w2a)
w3a.AddArg3(x1y1Hi, v20, v21)
w1b := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
v23 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v23.AddArg(w1a)
w1b.AddArg2(x1y0Lo, v23)
w2b := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags))
v25 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v25.AddArg(w2a)
v26 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v26.AddArg(w1b)
w2b.AddArg3(x1y1Lo, v25, v26)
w3b := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
v28 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v28.AddArg(w2b)
w3b.AddArg3(w3a, v20, v28)
v29 := b.NewValue0(v.Pos, OpMakeTuple, types.NewTuple(typ.UInt64, typ.UInt64))
v30 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
v31 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v31.AddArg(w2b)
v30.AddArg2(w3b, v31)
v32 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
v33 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v33.AddArg(w1b)
v32.AddArg2(v33, x0y0Lo)
v29.AddArg2(v30, v32)
v.AddArgs(x0, x1, y0, y1, x0y0, x0y1, x1y0, x1y1, x0y0Hi, x0y0Lo, x0y1Hi, x0y1Lo, x1y0Hi, x1y0Lo, x1y1Hi, x1y1Lo, w1a, w2a, w3a, w1b, w2b, w3b, v29)
return true return true
} }
} }
@ -2705,6 +2898,34 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool {
return true return true
} }
} }
func rewriteValuedec64_OpSelect0(v *Value) bool {
v_0 := v.Args[0]
// match: (Select0 (MakeTuple x y))
// result: x
for {
if v_0.Op != OpMakeTuple {
break
}
x := v_0.Args[0]
v.copyOf(x)
return true
}
return false
}
func rewriteValuedec64_OpSelect1(v *Value) bool {
v_0 := v.Args[0]
// match: (Select1 (MakeTuple x y))
// result: y
for {
if v_0.Op != OpMakeTuple {
break
}
y := v_0.Args[1]
v.copyOf(y)
return true
}
return false
}
func rewriteValuedec64_OpSignExt16to64(v *Value) bool { func rewriteValuedec64_OpSignExt16to64(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
@ -2815,29 +3036,33 @@ func rewriteValuedec64_OpSub64(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
// match: (Sub64 x y) // match: (Sub64 <t> x y)
// result: (Int64Make (Sub32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y)))) // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) sub: (Sub32carry x0 y0) (Int64Make (Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub)) (Select0 <typ.UInt32> sub)))
for { for {
t := v.Type
x := v_0 x := v_0
y := v_1 y := v_1
v.reset(OpInt64Make) v.reset(OpLast)
v0 := b.NewValue0(v.Pos, OpSub32withcarry, typ.Int32) v.Type = t
v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
v1.AddArg(x) x0.AddArg(x)
v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v2.AddArg(y) x1.AddArg(x)
v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
v4 := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) y0.AddArg(y)
v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
v5.AddArg(x) y1.AddArg(y)
v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) sub := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
v6.AddArg(y) sub.AddArg2(x0, y0)
v4.AddArg2(v5, v6) v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
v3.AddArg(v4) v6 := b.NewValue0(v.Pos, OpSub32withcarry, typ.UInt32)
v0.AddArg3(v1, v2, v3) v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) v7.AddArg(sub)
v7.AddArg(v4) v6.AddArg3(x1, y1, v7)
v.AddArg2(v0, v7) v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
v8.AddArg(sub)
v5.AddArg2(v6, v8)
v.AddArg6(x0, x1, y0, y1, sub, v5)
return true return true
} }
} }

View file

@ -20,8 +20,6 @@ func rewriteValuedivmod(v *Value) bool {
return rewriteValuedivmod_OpDiv8(v) return rewriteValuedivmod_OpDiv8(v)
case OpDiv8u: case OpDiv8u:
return rewriteValuedivmod_OpDiv8u(v) return rewriteValuedivmod_OpDiv8u(v)
case OpMod32u:
return rewriteValuedivmod_OpMod32u(v)
} }
return false return false
} }
@ -646,7 +644,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
return true return true
} }
// match: (Div64 <t> x (Const64 [c])) // match: (Div64 <t> x (Const64 [c]))
// cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul // cond: smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul
// result: (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)])) (Const64 <typ.UInt64> [smagic64(c).s - 1])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63]))) // result: (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)])) (Const64 <typ.UInt64> [smagic64(c).s - 1])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
for { for {
t := v.Type t := v.Type
@ -655,7 +653,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul) { if !(smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul) {
break break
} }
v.reset(OpSub64) v.reset(OpSub64)
@ -676,7 +674,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
return true return true
} }
// match: (Div64 <t> x (Const64 [c])) // match: (Div64 <t> x (Const64 [c]))
// cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul // cond: smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul
// result: (Sub64 <t> (Rsh64x64 <t> (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))) (Const64 <typ.UInt64> [smagic64(c).s])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63]))) // result: (Sub64 <t> (Rsh64x64 <t> (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))) (Const64 <typ.UInt64> [smagic64(c).s])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
for { for {
t := v.Type t := v.Type
@ -685,7 +683,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul) { if !(smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul) {
break break
} }
v.reset(OpSub64) v.reset(OpSub64)
@ -716,7 +714,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
config := b.Func.Config config := b.Func.Config
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
// match: (Div64u <t> x (Const64 [c])) // match: (Div64u <t> x (Const64 [c]))
// cond: t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul // cond: t.IsSigned() && smagicOK64(c) && config.useHmul
// result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])) (Const64 <typ.UInt64> [smagic64(c).s])) // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])) (Const64 <typ.UInt64> [smagic64(c).s]))
for { for {
t := v.Type t := v.Type
@ -725,7 +723,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul) { if !(t.IsSigned() && smagicOK64(c) && config.useHmul) {
break break
} }
v.reset(OpRsh64Ux64) v.reset(OpRsh64Ux64)
@ -740,7 +738,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
return true return true
} }
// match: (Div64u <t> x (Const64 [c])) // match: (Div64u <t> x (Const64 [c]))
// cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul
// result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 1])) // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
for { for {
t := v.Type t := v.Type
@ -749,7 +747,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul) { if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul) {
break break
} }
v.reset(OpRsh64Ux64) v.reset(OpRsh64Ux64)
@ -764,7 +762,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
return true return true
} }
// match: (Div64u <t> x (Const64 [c])) // match: (Div64u <t> x (Const64 [c]))
// cond: umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul // cond: umagicOK64(c) && c&1 == 0 && config.useHmul
// result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])) (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 2])) // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])) (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 2]))
for { for {
t := v.Type t := v.Type
@ -773,7 +771,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul) { if !(umagicOK64(c) && c&1 == 0 && config.useHmul) {
break break
} }
v.reset(OpRsh64Ux64) v.reset(OpRsh64Ux64)
@ -792,7 +790,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
return true return true
} }
// match: (Div64u <t> x (Const64 [c])) // match: (Div64u <t> x (Const64 [c]))
// cond: umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul // cond: umagicOK64(c) && config.useAvg && config.useHmul
// result: (Rsh64Ux64 <t> (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)]))) (Const64 <typ.UInt64> [umagic64(c).s - 1])) // result: (Rsh64Ux64 <t> (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)]))) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
for { for {
t := v.Type t := v.Type
@ -801,7 +799,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul) { if !(umagicOK64(c) && config.useAvg && config.useHmul) {
break break
} }
v.reset(OpRsh64Ux64) v.reset(OpRsh64Ux64)
@ -817,66 +815,6 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
v.AddArg2(v0, v3) v.AddArg2(v0, v3)
return true return true
} }
// match: (Div64u x (Const64 [c]))
// cond: c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul
// result: (Add64 (Add64 <typ.UInt64> (Add64 <typ.UInt64> (Lsh64x64 <typ.UInt64> (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [32])) (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])))) (Mul64 <typ.UInt64> (ZeroExt32to64 <typ.UInt64> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [int64((1<<32)/c)]))) (ZeroExt32to64 (Div32u <typ.UInt32> (Add32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])) (Mul32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)])) (Const32 <typ.UInt32> [int32((1<<32)%c)]))) (Const32 <typ.UInt32> [int32(c)]))))
for {
x := v_0
if v_1.Op != OpConst64 {
break
}
c := auxIntToInt64(v_1.AuxInt)
if !(c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul) {
break
}
v.reset(OpAdd64)
v0 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64)
v1 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64)
v2 := b.NewValue0(v.Pos, OpLsh64x64, typ.UInt64)
v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v4 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
v5 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32)
v6 := b.NewValue0(v.Pos, OpRsh64Ux64, typ.UInt64)
v7 := b.NewValue0(v.Pos, OpConst64, typ.UInt64)
v7.AuxInt = int64ToAuxInt(32)
v6.AddArg2(x, v7)
v5.AddArg(v6)
v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
v8.AuxInt = int32ToAuxInt(int32(c))
v4.AddArg2(v5, v8)
v3.AddArg(v4)
v2.AddArg2(v3, v7)
v9 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v10 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
v11 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32)
v11.AddArg(x)
v10.AddArg2(v11, v8)
v9.AddArg(v10)
v1.AddArg2(v2, v9)
v12 := b.NewValue0(v.Pos, OpMul64, typ.UInt64)
v13 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v14 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32)
v14.AddArg2(v5, v8)
v13.AddArg(v14)
v15 := b.NewValue0(v.Pos, OpConst64, typ.UInt64)
v15.AuxInt = int64ToAuxInt(int64((1 << 32) / c))
v12.AddArg2(v13, v15)
v0.AddArg2(v1, v12)
v16 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
v17 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
v18 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
v19 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32)
v19.AddArg2(v11, v8)
v20 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
v21 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
v21.AuxInt = int32ToAuxInt(int32((1 << 32) % c))
v20.AddArg2(v14, v21)
v18.AddArg2(v19, v20)
v17.AddArg2(v18, v8)
v16.AddArg(v17)
v.AddArg2(v0, v16)
return true
}
return false return false
} }
func rewriteValuedivmod_OpDiv8(v *Value) bool { func rewriteValuedivmod_OpDiv8(v *Value) bool {
@ -982,35 +920,6 @@ func rewriteValuedivmod_OpDiv8u(v *Value) bool {
} }
return false return false
} }
func rewriteValuedivmod_OpMod32u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
// match: (Mod32u <t> x (Const32 [c]))
// cond: x.Op != OpConst32 && c > 0 && umagicOK32(c)
// result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
for {
t := v.Type
x := v_0
if v_1.Op != OpConst32 {
break
}
c := auxIntToInt32(v_1.AuxInt)
if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) {
break
}
v.reset(OpSub32)
v0 := b.NewValue0(v.Pos, OpMul32, t)
v1 := b.NewValue0(v.Pos, OpDiv32u, t)
v2 := b.NewValue0(v.Pos, OpConst32, t)
v2.AuxInt = int32ToAuxInt(c)
v1.AddArg2(x, v2)
v0.AddArg2(v1, v2)
v.AddArg2(x, v0)
return true
}
return false
}
func rewriteBlockdivmod(b *Block) bool { func rewriteBlockdivmod(b *Block) bool {
return false return false
} }

View file

@ -14724,7 +14724,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool {
return true return true
} }
// match: (Mod16u <t> x (Const16 [c])) // match: (Mod16u <t> x (Const16 [c]))
// cond: x.Op != OpConst16 && c > 0 && umagicOK16(c) // cond: x.Op != OpConst16 && c != 0
// result: (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c]))) // result: (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
for { for {
t := v.Type t := v.Type
@ -14733,7 +14733,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool {
break break
} }
c := auxIntToInt16(v_1.AuxInt) c := auxIntToInt16(v_1.AuxInt)
if !(x.Op != OpConst16 && c > 0 && umagicOK16(c)) { if !(x.Op != OpConst16 && c != 0) {
break break
} }
v.reset(OpSub16) v.reset(OpSub16)
@ -14878,7 +14878,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool {
return true return true
} }
// match: (Mod32u <t> x (Const32 [c])) // match: (Mod32u <t> x (Const32 [c]))
// cond: x.Op != OpConst32 && c > 0 && umagicOK32(c) // cond: x.Op != OpConst32 && c != 0
// result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c]))) // result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
for { for {
t := v.Type t := v.Type
@ -14887,7 +14887,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool {
break break
} }
c := auxIntToInt32(v_1.AuxInt) c := auxIntToInt32(v_1.AuxInt)
if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) { if !(x.Op != OpConst32 && c != 0) {
break break
} }
v.reset(OpSub32) v.reset(OpSub32)
@ -15043,7 +15043,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
return true return true
} }
// match: (Mod64u <t> x (Const64 [c])) // match: (Mod64u <t> x (Const64 [c]))
// cond: x.Op != OpConst64 && c > 0 && umagicOK64(c) // cond: x.Op != OpConst64 && c != 0
// result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c]))) // result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
for { for {
t := v.Type t := v.Type
@ -15052,7 +15052,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
break break
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(x.Op != OpConst64 && c > 0 && umagicOK64(c)) { if !(x.Op != OpConst64 && c != 0) {
break break
} }
v.reset(OpSub64) v.reset(OpSub64)
@ -15197,7 +15197,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
return true return true
} }
// match: (Mod8u <t> x (Const8 [c])) // match: (Mod8u <t> x (Const8 [c]))
// cond: x.Op != OpConst8 && c > 0 && umagicOK8( c) // cond: x.Op != OpConst8 && c != 0
// result: (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c]))) // result: (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c])))
for { for {
t := v.Type t := v.Type
@ -15206,7 +15206,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
break break
} }
c := auxIntToInt8(v_1.AuxInt) c := auxIntToInt8(v_1.AuxInt)
if !(x.Op != OpConst8 && c > 0 && umagicOK8(c)) { if !(x.Op != OpConst8 && c != 0) {
break break
} }
v.reset(OpSub8) v.reset(OpSub8)

View file

@ -1223,7 +1223,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
}, },
sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64) sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.PPC64, sys.S390X, sys.MIPS64, sys.MIPS, sys.RISCV64, sys.Loong64)
alias("math/bits", "Mul", "math/bits", "Mul64", p8...) alias("math/bits", "Mul", "math/bits", "Mul64", p8...)
alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...) alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...)
addF("math/bits", "Add64", addF("math/bits", "Add64",

View file

@ -33,6 +33,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"386", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, {"386", "internal/runtime/sys", "TrailingZeros64"}: struct{}{},
{"386", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, {"386", "internal/runtime/sys", "TrailingZeros8"}: struct{}{},
{"386", "math", "sqrt"}: struct{}{}, {"386", "math", "sqrt"}: struct{}{},
{"386", "math/bits", "Mul64"}: struct{}{},
{"386", "math/bits", "ReverseBytes32"}: struct{}{}, {"386", "math/bits", "ReverseBytes32"}: struct{}{},
{"386", "math/bits", "ReverseBytes64"}: struct{}{}, {"386", "math/bits", "ReverseBytes64"}: struct{}{},
{"386", "math/bits", "TrailingZeros16"}: struct{}{}, {"386", "math/bits", "TrailingZeros16"}: struct{}{},
@ -208,6 +209,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"arm", "math/bits", "Len32"}: struct{}{}, {"arm", "math/bits", "Len32"}: struct{}{},
{"arm", "math/bits", "Len64"}: struct{}{}, {"arm", "math/bits", "Len64"}: struct{}{},
{"arm", "math/bits", "Len8"}: struct{}{}, {"arm", "math/bits", "Len8"}: struct{}{},
{"arm", "math/bits", "Mul64"}: struct{}{},
{"arm", "math/bits", "ReverseBytes32"}: struct{}{}, {"arm", "math/bits", "ReverseBytes32"}: struct{}{},
{"arm", "math/bits", "ReverseBytes64"}: struct{}{}, {"arm", "math/bits", "ReverseBytes64"}: struct{}{},
{"arm", "math/bits", "RotateLeft32"}: struct{}{}, {"arm", "math/bits", "RotateLeft32"}: struct{}{},
@ -557,6 +559,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"mips", "math/bits", "Len32"}: struct{}{}, {"mips", "math/bits", "Len32"}: struct{}{},
{"mips", "math/bits", "Len64"}: struct{}{}, {"mips", "math/bits", "Len64"}: struct{}{},
{"mips", "math/bits", "Len8"}: struct{}{}, {"mips", "math/bits", "Len8"}: struct{}{},
{"mips", "math/bits", "Mul64"}: struct{}{},
{"mips", "math/bits", "TrailingZeros16"}: struct{}{}, {"mips", "math/bits", "TrailingZeros16"}: struct{}{},
{"mips", "math/bits", "TrailingZeros32"}: struct{}{}, {"mips", "math/bits", "TrailingZeros32"}: struct{}{},
{"mips", "math/bits", "TrailingZeros64"}: struct{}{}, {"mips", "math/bits", "TrailingZeros64"}: struct{}{},
@ -806,6 +809,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"mipsle", "math/bits", "Len32"}: struct{}{}, {"mipsle", "math/bits", "Len32"}: struct{}{},
{"mipsle", "math/bits", "Len64"}: struct{}{}, {"mipsle", "math/bits", "Len64"}: struct{}{},
{"mipsle", "math/bits", "Len8"}: struct{}{}, {"mipsle", "math/bits", "Len8"}: struct{}{},
{"mipsle", "math/bits", "Mul64"}: struct{}{},
{"mipsle", "math/bits", "TrailingZeros16"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros16"}: struct{}{},
{"mipsle", "math/bits", "TrailingZeros32"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros32"}: struct{}{},
{"mipsle", "math/bits", "TrailingZeros64"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros64"}: struct{}{},

View file

@ -704,27 +704,21 @@ func walkDivMod(n *ir.BinaryExpr, init *ir.Nodes) ir.Node {
// runtime calls late in SSA processing. // runtime calls late in SSA processing.
if types.RegSize < 8 && (et == types.TINT64 || et == types.TUINT64) { if types.RegSize < 8 && (et == types.TINT64 || et == types.TUINT64) {
if n.Y.Op() == ir.OLITERAL { if n.Y.Op() == ir.OLITERAL {
// Leave div/mod by constant powers of 2 or small 16-bit constants. // Leave div/mod by non-zero uint64 constants.
// The SSA backend will handle those. // The SSA backend will handle those.
// (Zero constants should have been rejected already, but we check just in case.)
switch et { switch et {
case types.TINT64: case types.TINT64:
c := ir.Int64Val(n.Y) if ir.Int64Val(n.Y) != 0 {
if c < 0 {
c = -c
}
if c != 0 && c&(c-1) == 0 {
return n return n
} }
case types.TUINT64: case types.TUINT64:
c := ir.Uint64Val(n.Y) if ir.Uint64Val(n.Y) != 0 {
if c < 1<<16 {
return n
}
if c != 0 && c&(c-1) == 0 {
return n return n
} }
} }
} }
// Build call to uint64div, uint64mod, int64div, or int64mod.
var fn string var fn string
if et == types.TINT64 { if et == types.TINT64 {
fn = "int64" fn = "int64"

View file

@ -167,7 +167,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.Op386SBBL: ssa.Op386SBBL:
opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: case ssa.Op386ADDLcarry, ssa.Op386ADCLcarry, ssa.Op386SUBLcarry:
// output 0 is carry/borrow, output 1 is the low 32 bits. // output 0 is carry/borrow, output 1 is the low 32 bits.
opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg()) opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())

View file

@ -279,7 +279,10 @@ func div3_uint32(i uint32) uint32 {
} }
func div3_uint64(i uint64) uint64 { func div3_uint64(i uint64) uint64 {
// 386 "CALL" // 386: "MOVL [$]-1431655766"
// 386: "MULL"
// 386: "SHRL [$]1"
// 386 -".*CALL"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH" // arm64: "UMULH"
// arm64: "LSR [$]1," // arm64: "LSR [$]1,"
@ -308,7 +311,10 @@ func div14_uint32(i uint32) uint32 {
} }
func div14_uint64(i uint64) uint64 { func div14_uint64(i uint64) uint64 {
// 386 "CALL" // 386: "MOVL [$]-1840700270,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: -".*CALL"
// arm64: "MOVD [$]-7905747460161236406," // arm64: "MOVD [$]-7905747460161236406,"
// arm64: "UMULH" // arm64: "UMULH"
// arm64: "LSR [$]2," // arm64: "LSR [$]2,"
@ -343,7 +349,10 @@ func div7_uint32(i uint32) uint32 {
} }
func div7_uint64(i uint64) uint64 { func div7_uint64(i uint64) uint64 {
// 386 "CALL" // 386: "MOVL [$]-1840700269,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: -".*CALL"
// arm64: "MOVD [$]2635249153387078803," // arm64: "MOVD [$]2635249153387078803,"
// arm64: "UMULH" // arm64: "UMULH"
// arm64: "SUB", // arm64: "SUB",
@ -353,7 +362,11 @@ func div7_uint64(i uint64) uint64 {
} }
func div12345_uint64(i uint64) uint64 { func div12345_uint64(i uint64) uint64 {
// 386 "CALL" // 386: "MOVL [$]-1444876402,"
// 386: "MOVL [$]835683390,"
// 386: "MULL"
// 386: "SHRL [$]13,"
// 386: "SHLL [$]19,"
// arm64: "MOVD [$]-6205696892516465602," // arm64: "MOVD [$]-6205696892516465602,"
// arm64: "UMULH" // arm64: "UMULH"
// arm64: "LSR [$]13," // arm64: "LSR [$]13,"
@ -869,7 +882,12 @@ func ndivis6_int32(i int32) bool {
} }
func divis6_int64(i int64) bool { func divis6_int64(i int64) bool {
// 386 "CALL" // 386: "IMUL3L [$]-1431655766,"
// 386: "IMUL3L [$]-1431655765,"
// 386: "ADCL [$]715827882,"
// 386: "CMPL .*, [$]715827882"
// 386: "CMPL .*, [$]-1431655766"
// 386: "SETLS"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL " // arm64: "MUL "
// arm64: "MOVD [$]3074457345618258602," // arm64: "MOVD [$]3074457345618258602,"
@ -880,7 +898,12 @@ func divis6_int64(i int64) bool {
} }
func ndivis6_int64(i int64) bool { func ndivis6_int64(i int64) bool {
// 386 "CALL" // 386: "IMUL3L [$]-1431655766,"
// 386: "IMUL3L [$]-1431655765,"
// 386: "ADCL [$]715827882,"
// 386: "CMPL .*, [$]715827882"
// 386: "CMPL .*, [$]-1431655766"
// 386: "SETHI"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL " // arm64: "MUL "
// arm64: "MOVD [$]3074457345618258602," // arm64: "MOVD [$]3074457345618258602,"
@ -973,7 +996,14 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) {
} }
func div_divis6_uint64(i uint64) (uint64, bool) { func div_divis6_uint64(i uint64) (uint64, bool) {
// 386 "CALL" // 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: "SHLL [$]30,"
// 386: "SETEQ"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH" // arm64: "UMULH"
// arm64: "LSR [$]2," // arm64: "LSR [$]2,"
@ -983,7 +1013,14 @@ func div_divis6_uint64(i uint64) (uint64, bool) {
} }
func div_ndivis6_uint64(i uint64) (uint64, bool) { func div_ndivis6_uint64(i uint64) (uint64, bool) {
// 386 "CALL" // 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: "SHLL [$]30,"
// 386: "SETNE"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH" // arm64: "UMULH"
// arm64: "LSR [$]2," // arm64: "LSR [$]2,"
@ -1091,7 +1128,16 @@ func div_ndivis6_int32(i int32) (int32, bool) {
} }
func div_divis6_int64(i int64) (int64, bool) { func div_divis6_int64(i int64) (int64, bool) {
// 386 "CALL" // 386: "ANDL [$]-1431655766,"
// 386: "ANDL [$]-1431655765,"
// 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "SUBL" "SBBL"
// 386: "MULL"
// 386: "SETEQ"
// 386: -"SET(LS|HI)"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH" // arm64: "SMULH"
// arm64: "ADD" // arm64: "ADD"
@ -1103,7 +1149,16 @@ func div_divis6_int64(i int64) (int64, bool) {
} }
func div_ndivis6_int64(i int64) (int64, bool) { func div_ndivis6_int64(i int64) (int64, bool) {
// 386 "CALL" // 386: "ANDL [$]-1431655766,"
// 386: "ANDL [$]-1431655765,"
// 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "SUBL" "SBBL"
// 386: "MULL"
// 386: "SETNE"
// 386: -"SET(LS|HI)"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205," // arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH" // arm64: "SMULH"
// arm64: "ADD" // arm64: "ADD"