diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go index a3bfb491b8b..b31ffa474bc 100644 --- a/src/cmd/compile/internal/arm/ssa.go +++ b/src/cmd/compile/internal/arm/ssa.go @@ -245,6 +245,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDS, + ssa.OpARMADCS, ssa.OpARMSUBS: r := v.Reg0() r1 := v.Args[0].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/386.rules b/src/cmd/compile/internal/ssa/_gen/386.rules index 5f115024192..cbe56f7579e 100644 --- a/src/cmd/compile/internal/ssa/_gen/386.rules +++ b/src/cmd/compile/internal/ssa/_gen/386.rules @@ -7,6 +7,7 @@ (Add(32|64)F ...) => (ADDS(S|D) ...) (Add32carry ...) => (ADDLcarry ...) (Add32withcarry ...) => (ADCL ...) +(Add32carrywithcarry ...) => (ADCLcarry ...) (Sub(Ptr|32|16|8) ...) => (SUBL ...) (Sub(32|64)F ...) => (SUBS(S|D) ...) diff --git a/src/cmd/compile/internal/ssa/_gen/386Ops.go b/src/cmd/compile/internal/ssa/_gen/386Ops.go index 60599a33abb..09bfc4226ff 100644 --- a/src/cmd/compile/internal/ssa/_gen/386Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/386Ops.go @@ -90,22 +90,23 @@ func init() { // Common regInfo var ( - gp01 = regInfo{inputs: nil, outputs: gponly} - gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} - gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} - gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} - gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} - gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} - gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} - gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} - gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} - gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} - gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} - gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} - gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx} - gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax} - gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax} - gp21mul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}} + gp01 = regInfo{inputs: nil, outputs: gponly} + gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} + gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} + gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} + gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} + gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} + gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} + gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} + gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} + gp2carry1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} + gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} + gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} + gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} + gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx} + gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax} + gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax} + gp21mul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}} gp1flags = regInfo{inputs: []regMask{gpsp}} @@ -181,10 +182,11 @@ func init() { {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint - {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates pair - {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates pair - {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags - {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags + {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates pair + {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates pair + {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags + {name: "ADCLcarry", argLength: 3, reg: gp2carry1carry, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags, generates pair + {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, // arg0 - arg1 {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint diff --git a/src/cmd/compile/internal/ssa/_gen/ARM.rules b/src/cmd/compile/internal/ssa/_gen/ARM.rules index 18b5d6bba60..b63ca23de14 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM.rules @@ -6,6 +6,7 @@ (Add(32|64)F ...) => (ADD(F|D) ...) (Add32carry ...) => (ADDS ...) (Add32withcarry ...) => (ADC ...) +(Add32carrywithcarry ...) => (ADCS ...) (Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (SUB(F|D) ...) diff --git a/src/cmd/compile/internal/ssa/_gen/ARMOps.go b/src/cmd/compile/internal/ssa/_gen/ARMOps.go index 01cd48835e2..59bb71b2e3c 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARMOps.go +++ b/src/cmd/compile/internal/ssa/_gen/ARMOps.go @@ -102,36 +102,37 @@ func init() { ) // Common regInfo var ( - gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} - gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} - gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}} - gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} - gp1flags = regInfo{inputs: []regMask{gpg}} - gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} - gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} - gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}} - gp2flags = regInfo{inputs: []regMask{gpg, gpg}} - gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} - gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} - gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} - gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}} - gp3flags = regInfo{inputs: []regMask{gp, gp, gp}} - gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} - gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} - gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} - gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} - gp2store = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} - fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} - fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} - fp1flags = regInfo{inputs: []regMask{fp}} - fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp - gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")} - fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} - fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} - fp2flags = regInfo{inputs: []regMask{fp, fp}} - fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} - fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} - readflags = regInfo{inputs: nil, outputs: []regMask{gp}} + gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} + gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} + gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}} + gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} + gp1flags = regInfo{inputs: []regMask{gpg}} + gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} + gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} + gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}} + gp2flags = regInfo{inputs: []regMask{gpg, gpg}} + gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} + gp2flags1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} + gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} + gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} + gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}} + gp3flags = regInfo{inputs: []regMask{gp, gp, gp}} + gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} + gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} + gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} + gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} + gp2store = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} + fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} + fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} + fp1flags = regInfo{inputs: []regMask{fp}} + fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp + gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")} + fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} + fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} + fp2flags = regInfo{inputs: []regMask{fp, fp}} + fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} + fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} + readflags = regInfo{inputs: nil, outputs: []regMask{gp}} ) ops := []opData{ // binary ops @@ -161,16 +162,17 @@ func init() { call: false, // TODO(mdempsky): Should this be true? }, - {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag - {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag - {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags - {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags - {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag - {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag - {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag - {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"}, // arg0 - arg1 - carry, arg2=flags - {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"}, // arg0 - auxInt - carry, arg1=flags - {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"}, // auxInt - arg0 - carry, arg1=flags + {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag + {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag + {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags + {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags + {name: "ADCS", argLength: 3, reg: gp2flags1carry, asm: "ADC", commutative: true}, // arg0 + arg1 + carrry, sets carry + {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag + {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag + {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag + {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"}, // arg0 - arg1 - carry, arg2=flags + {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"}, // arg0 - auxInt - carry, arg1=flags + {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"}, // auxInt - arg0 - carry, arg1=flags {name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1 {name: "MULA", argLength: 3, reg: gp31, asm: "MULA"}, // arg0 * arg1 + arg2 diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS.rules b/src/cmd/compile/internal/ssa/_gen/MIPS.rules index 80bf9017f52..fe1e00a4e4c 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules +++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules @@ -9,6 +9,12 @@ (Select1 (Add32carry x y)) => (SGTU x (ADD x y)) (Add32withcarry x y c) => (ADD c (ADD x y)) +(Select0 (Add32carrywithcarry x y c)) => (ADD c (ADD x y)) +(Select1 (Add32carrywithcarry x y c)) => + (OR + (SGTU x xy:(ADD x y)) + (SGTU xy (ADD c xy))) + (Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (SUB(F|D) ...) diff --git a/src/cmd/compile/internal/ssa/_gen/dec64.rules b/src/cmd/compile/internal/ssa/_gen/dec64.rules index 589c2fcfc14..483818906e6 100644 --- a/src/cmd/compile/internal/ssa/_gen/dec64.rules +++ b/src/cmd/compile/internal/ssa/_gen/dec64.rules @@ -6,8 +6,12 @@ // architectures. These rules work together with the decomposeBuiltin // pass which handles phis of these typ. +(Last ___) => v.Args[len(v.Args)-1] + (Int64Hi (Int64Make hi _)) => hi (Int64Lo (Int64Make _ lo)) => lo +(Select0 (MakeTuple x y)) => x +(Select1 (MakeTuple x y)) => y (Load ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() => (Int64Make @@ -60,30 +64,85 @@ (Arg {n} [off]) (Arg {n} [off+4])) -(Add64 x y) => - (Int64Make - (Add32withcarry - (Int64Hi x) - (Int64Hi y) - (Select1 (Add32carry (Int64Lo x) (Int64Lo y)))) - (Select0 (Add32carry (Int64Lo x) (Int64Lo y)))) +(Add64 x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + add: (Add32carry x0 y0) + (Int64Make + (Add32withcarry x1 y1 (Select1 add)) + (Select0 add))) -(Sub64 x y) => - (Int64Make - (Sub32withcarry - (Int64Hi x) - (Int64Hi y) - (Select1 (Sub32carry (Int64Lo x) (Int64Lo y)))) - (Select0 (Sub32carry (Int64Lo x) (Int64Lo y)))) +(Sub64 x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + sub: (Sub32carry x0 y0) + (Int64Make + (Sub32withcarry x1 y1 (Select1 sub)) + (Select0 sub))) + +(Mul64 x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + x0y0: (Mul32uhilo x0 y0) + x0y0Hi: (Select0 x0y0) + x0y0Lo: (Select1 x0y0) + (Int64Make + (Add32 x0y0Hi + (Add32 + (Mul32 x0 y1) + (Mul32 x1 y0))) + x0y0Lo)) + +(Mul64uhilo x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + x0y0: (Mul32uhilo x0 y0) + x0y1: (Mul32uhilo x0 y1) + x1y0: (Mul32uhilo x1 y0) + x1y1: (Mul32uhilo x1 y1) + x0y0Hi: (Select0 x0y0) + x0y0Lo: (Select1 x0y0) + x0y1Hi: (Select0 x0y1) + x0y1Lo: (Select1 x0y1) + x1y0Hi: (Select0 x1y0) + x1y0Lo: (Select1 x1y0) + x1y1Hi: (Select0 x1y1) + x1y1Lo: (Select1 x1y1) + w1a: (Add32carry x0y0Hi x0y1Lo) + w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 w1a)) + w3a: (Add32withcarry x1y1Hi (Const32 [0]) (Select1 w2a)) + w1b: (Add32carry x1y0Lo (Select0 w1a)) + w2b: (Add32carrywithcarry x1y1Lo (Select0 w2a) (Select1 w1b)) + w3b: (Add32withcarry w3a (Const32 [0]) (Select1 w2b)) + (MakeTuple + (Int64Make w3b (Select0 w2b)) + (Int64Make (Select0 w1b) x0y0Lo))) + +(Hmul64u x y) => (Select0 (Mul64uhilo x y)) + +// Hacker's Delight p. 175: signed hmul = unsigned hmul - (x<0)&y - (y<0)&x. +(Hmul64 x y) => + (Last + p: (Hmul64u x y) + xSign: (Int64Make xs:(Rsh32x32 (Int64Hi x) (Const32 [31])) xs) + ySign: (Int64Make ys:(Rsh32x32 (Int64Hi y) (Const32 [31])) ys) + (Sub64 (Sub64 p (And64 xSign y)) (And64 ySign x))) + +// (x+y)/2 => (x-y)/2 + y +(Avg64u x y) => (Add64 (Rsh64Ux32 (Sub64 x y) (Const32 [1])) y) -(Mul64 x y) => - (Int64Make - (Add32 - (Mul32 (Int64Lo x) (Int64Hi y)) - (Add32 - (Mul32 (Int64Hi x) (Int64Lo y)) - (Select0 (Mul32uhilo (Int64Lo x) (Int64Lo y))))) - (Select1 (Mul32uhilo (Int64Lo x) (Int64Lo y)))) (And64 x y) => (Int64Make diff --git a/src/cmd/compile/internal/ssa/_gen/divmod.rules b/src/cmd/compile/internal/ssa/_gen/divmod.rules index c7c9e132095..21e0a194068 100644 --- a/src/cmd/compile/internal/ssa/_gen/divmod.rules +++ b/src/cmd/compile/internal/ssa/_gen/divmod.rules @@ -118,7 +118,7 @@ (Hmul32 x (Const32 [int32(smagic32(c).m/2)])) (Const64 [smagic32(c).s - 1])) (Rsh32x64 x (Const64 [31]))) -(Div64 x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul => +(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul => (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) @@ -132,7 +132,7 @@ (Add32 x (Hmul32 x (Const32 [int32(smagic32(c).m)]))) (Const64 [smagic32(c).s])) (Rsh32x64 x (Const64 [31]))) -(Div64 x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul => +(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul => (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) @@ -153,7 +153,7 @@ (Rsh32Ux64 (Hmul32u x (Const32 [int32(smagic32(c).m)])) (Const64 [smagic32(c).s])) -(Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul => +(Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul => (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) @@ -185,7 +185,7 @@ (Rsh32Ux64 (Hmul32u x (Const32 [int32(1<<31 + umagic32(c).m/2)])) (Const64 [umagic32(c).s - 1])) -(Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul => (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) @@ -211,7 +211,7 @@ (Rsh32Ux64 x (Const64 [1])) (Const32 [int32(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [umagic32(c).s - 2])) -(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul => (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) @@ -237,52 +237,7 @@ (Rsh32Ux64 (Avg32u x (Hmul32u x (Const32 [int32(umagic32(c).m)]))) (Const64 [umagic32(c).s - 1])) -(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul => (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1])) - -// Case 9. For unsigned 64-bit divides on 32-bit machines, -// if the constant fits in 16 bits (so that the last term -// fits in 32 bits), convert to three 32-bit divides by a constant. -// -// If 1<<32 = Q * c + R -// and x = hi << 32 + lo -// -// Then x = (hi/c*c + hi%c) << 32 + lo -// = hi/c*c<<32 + hi%c<<32 + lo -// = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c -// = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c) -// and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c -(Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul => - (Add64 - (Add64 - (Add64 - (Lsh64x64 - (ZeroExt32to64 - (Div32u - (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) - (Const32 [int32(c)]))) - (Const64 [32])) - (ZeroExt32to64 (Div32u (Trunc64to32 x) (Const32 [int32(c)])))) - (Mul64 - (ZeroExt32to64 - (Mod32u - (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) - (Const32 [int32(c)]))) - (Const64 [int64((1<<32)/c)]))) - (ZeroExt32to64 - (Div32u - (Add32 - (Mod32u (Trunc64to32 x) (Const32 [int32(c)])) - (Mul32 - (Mod32u - (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) - (Const32 [int32(c)])) - (Const32 [int32((1<<32)%c)]))) - (Const32 [int32(c)])))) - -// Repeated from generic.rules, for expanding the expression above -// (which can then be further expanded to handle the nested Div32u). -(Mod32u x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c) - => (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules index 3f026448326..7e3aba1e5ee 100644 --- a/src/cmd/compile/internal/ssa/_gen/generic.rules +++ b/src/cmd/compile/internal/ssa/_gen/generic.rules @@ -1106,13 +1106,13 @@ => (Sub32 x (Mul32 (Div32 x (Const32 [c])) (Const32 [c]))) (Mod64 x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63) => (Sub64 x (Mul64 (Div64 x (Const64 [c])) (Const64 [c]))) -(Mod8u x (Const8 [c])) && x.Op != OpConst8 && c > 0 && umagicOK8( c) +(Mod8u x (Const8 [c])) && x.Op != OpConst8 && c != 0 => (Sub8 x (Mul8 (Div8u x (Const8 [c])) (Const8 [c]))) -(Mod16u x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c) +(Mod16u x (Const16 [c])) && x.Op != OpConst16 && c != 0 => (Sub16 x (Mul16 (Div16u x (Const16 [c])) (Const16 [c]))) -(Mod32u x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c) +(Mod32u x (Const32 [c])) && x.Op != OpConst32 && c != 0 => (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) -(Mod64u x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c) +(Mod64u x (Const64 [c])) && x.Op != OpConst64 && c != 0 => (Sub64 x (Mul64 (Div64u x (Const64 [c])) (Const64 [c]))) // Set up for mod->mul+rot optimization in genericlateopt.rules. diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go index 1f6ad4e16d9..09fb4bf03f9 100644 --- a/src/cmd/compile/internal/ssa/_gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go @@ -16,6 +16,9 @@ package main // are signed or unsigned. var genericOps = []opData{ + // Pseudo-op. + {name: "Last", argLength: -1}, // return last element of tuple; for "let" bindings + // 2-input arithmetic // Types must be consistent with Go typing. Add, for example, must take two values // of the same type and produces that same type. @@ -557,8 +560,9 @@ var genericOps = []opData{ {name: "Int64Hi", argLength: 1, typ: "UInt32"}, // high 32-bit of arg0 {name: "Int64Lo", argLength: 1, typ: "UInt32"}, // low 32-bit of arg0 - {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry) - {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) + {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry) + {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) + {name: "Add32carrywithcarry", argLength: 3, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1 + arg2, arg2=carry, returns (value, carry) {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry) {name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1) diff --git a/src/cmd/compile/internal/ssa/_gen/rulegen.go b/src/cmd/compile/internal/ssa/_gen/rulegen.go index f818b46511d..e3a10707fed 100644 --- a/src/cmd/compile/internal/ssa/_gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/_gen/rulegen.go @@ -1271,8 +1271,10 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s case 0: case 1: rr.add(stmtf("%s.AddArg(%s)", v, all.String())) - default: + case 2, 3, 4, 5, 6: rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String())) + default: + rr.add(stmtf("%s.AddArgs(%s)", v, all.String())) } if cse != nil { @@ -1313,6 +1315,12 @@ outer: d++ case d > 0 && s[i] == close: d-- + case s[i] == ':': + // ignore spaces after colons + nonsp = true + for i+1 < len(s) && (s[i+1] == ' ' || s[i+1] == '\t') { + i++ + } default: nonsp = true } @@ -1347,7 +1355,7 @@ func extract(val string) (op, typ, auxint, aux string, args []string) { val = val[1 : len(val)-1] // remove () // Split val up into regions. - // Split by spaces/tabs, except those contained in (), {}, [], or <>. + // Split by spaces/tabs, except those contained in (), {}, [], or <> or after colon. s := split(val) // Extract restrictions and args. @@ -1471,7 +1479,7 @@ func splitNameExpr(arg string) (name, expr string) { // colon is inside the parens, such as in "(Foo x:(Bar))". return "", arg } - return arg[:colon], arg[colon+1:] + return arg[:colon], strings.TrimSpace(arg[colon+1:]) } func getBlockInfo(op string, arch arch) (name string, data blockData) { diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 16a983a5687..264f4b3bf37 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -386,6 +386,7 @@ const ( Op386ADDLcarry Op386ADDLconstcarry Op386ADCL + Op386ADCLcarry Op386ADCLconst Op386SUBL Op386SUBLconst @@ -1182,6 +1183,7 @@ const ( OpARMADDSconst OpARMADC OpARMADCconst + OpARMADCS OpARMSUBS OpARMSUBSconst OpARMRSBSconst @@ -3010,6 +3012,7 @@ const ( OpWasmI64Rotl OpWasmI64Popcnt + OpLast OpAdd8 OpAdd16 OpAdd32 @@ -3336,6 +3339,7 @@ const ( OpInt64Lo OpAdd32carry OpAdd32withcarry + OpAdd32carrywithcarry OpSub32carry OpSub32withcarry OpAdd64carry @@ -3968,6 +3972,24 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADCLcarry", + argLen: 3, + commutative: true, + resultInArg0: true, + clobberFlags: true, + asm: x86.AADCL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 239}, // AX CX DX BX BP SI DI + }, + outputs: []outputInfo{ + {1, 0}, + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, { name: "ADCLconst", auxType: auxInt32, @@ -15792,6 +15814,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADCS", + argLen: 3, + commutative: true, + asm: arm.AADC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + {1, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + outputs: []outputInfo{ + {1, 0}, + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, { name: "SUBS", argLen: 2, @@ -40672,6 +40710,11 @@ var opcodeTable = [...]opInfo{ }, }, + { + name: "Last", + argLen: -1, + generic: true, + }, { name: "Add8", argLen: 2, @@ -42480,6 +42523,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "Add32carrywithcarry", + argLen: 3, + commutative: true, + generic: true, + }, { name: "Sub32carry", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index 04954387106..be88dd3cdda 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -257,6 +257,9 @@ func rewriteValue386(v *Value) bool { case OpAdd32carry: v.Op = Op386ADDLcarry return true + case OpAdd32carrywithcarry: + v.Op = Op386ADCLcarry + return true case OpAdd32withcarry: v.Op = Op386ADCL return true diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index 44380cf8f57..2a90e7b433b 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -446,6 +446,9 @@ func rewriteValueARM(v *Value) bool { case OpAdd32carry: v.Op = OpARMADDS return true + case OpAdd32carrywithcarry: + v.Op = OpARMADCS + return true case OpAdd32withcarry: v.Op = OpARMADC return true diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go index fda02e64d19..ff696337ef8 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go @@ -6562,6 +6562,23 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool { v.AddArg2(x, y) return true } + // match: (Select0 (Add32carrywithcarry x y c)) + // result: (ADD c (ADD x y)) + for { + if v_0.Op != OpAdd32carrywithcarry { + break + } + t := v_0.Type + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpMIPSADD) + v.Type = t.FieldType(0) + v0 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0)) + v0.AddArg2(x, y) + v.AddArg2(c, v0) + return true + } // match: (Select0 (Sub32carry x y)) // result: (SUB x y) for { @@ -6759,6 +6776,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool { v.AddArg2(x, v0) return true } + // match: (Select1 (Add32carrywithcarry x y c)) + // result: (OR (SGTU x xy:(ADD x y)) (SGTU xy (ADD c xy))) + for { + if v_0.Op != OpAdd32carrywithcarry { + break + } + t := v_0.Type + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpMIPSOR) + v.Type = typ.Bool + v0 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool) + xy := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0)) + xy.AddArg2(x, y) + v0.AddArg2(x, xy) + v2 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0)) + v3.AddArg2(c, xy) + v2.AddArg2(xy, v3) + v.AddArg2(v0, v2) + return true + } // match: (Select1 (Sub32carry x y)) // result: (SGTU (SUB x y) x) for { diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go index b4da78fd522..a0388551b53 100644 --- a/src/cmd/compile/internal/ssa/rewritedec64.go +++ b/src/cmd/compile/internal/ssa/rewritedec64.go @@ -12,6 +12,8 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpAnd64(v) case OpArg: return rewriteValuedec64_OpArg(v) + case OpAvg64u: + return rewriteValuedec64_OpAvg64u(v) case OpBitLen64: return rewriteValuedec64_OpBitLen64(v) case OpBswap64: @@ -27,10 +29,16 @@ func rewriteValuedec64(v *Value) bool { return true case OpEq64: return rewriteValuedec64_OpEq64(v) + case OpHmul64: + return rewriteValuedec64_OpHmul64(v) + case OpHmul64u: + return rewriteValuedec64_OpHmul64u(v) case OpInt64Hi: return rewriteValuedec64_OpInt64Hi(v) case OpInt64Lo: return rewriteValuedec64_OpInt64Lo(v) + case OpLast: + return rewriteValuedec64_OpLast(v) case OpLeq64: return rewriteValuedec64_OpLeq64(v) case OpLeq64U: @@ -57,6 +65,8 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpLsh8x64(v) case OpMul64: return rewriteValuedec64_OpMul64(v) + case OpMul64uhilo: + return rewriteValuedec64_OpMul64uhilo(v) case OpNeg64: return rewriteValuedec64_OpNeg64(v) case OpNeq64: @@ -101,6 +111,10 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpRsh8Ux64(v) case OpRsh8x64: return rewriteValuedec64_OpRsh8x64(v) + case OpSelect0: + return rewriteValuedec64_OpSelect0(v) + case OpSelect1: + return rewriteValuedec64_OpSelect1(v) case OpSignExt16to64: return rewriteValuedec64_OpSignExt16to64(v) case OpSignExt32to64: @@ -133,29 +147,33 @@ func rewriteValuedec64_OpAdd64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Add64 x y) - // result: (Int64Make (Add32withcarry (Int64Hi x) (Int64Hi y) (Select1 (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 (Add32carry (Int64Lo x) (Int64Lo y)))) + // match: (Add64 x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) add: (Add32carry x0 y0) (Int64Make (Add32withcarry x1 y1 (Select1 add)) (Select0 add))) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpInt64Make) - v0 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.Int32) - v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v2.AddArg(y) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) - v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v5.AddArg(x) - v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v6.AddArg(y) - v4.AddArg2(v5, v6) - v3.AddArg(v4) - v0.AddArg3(v1, v2, v3) - v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) - v7.AddArg(v4) - v.AddArg2(v0, v7) + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + add := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + add.AddArg2(x0, y0) + v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v6 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v7.AddArg(add) + v6.AddArg3(x1, y1, v7) + v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v8.AddArg(add) + v5.AddArg2(v6, v8) + v.AddArg6(x0, x1, y0, y1, add, v5) return true } } @@ -268,6 +286,28 @@ func rewriteValuedec64_OpArg(v *Value) bool { } return false } +func rewriteValuedec64_OpAvg64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Avg64u x y) + // result: (Add64 (Rsh64Ux32 (Sub64 x y) (Const32 [1])) y) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpAdd64) + v0 := b.NewValue0(v.Pos, OpRsh64Ux32, t) + v1 := b.NewValue0(v.Pos, OpSub64, t) + v1.AddArg2(x, y) + v2 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v2.AuxInt = int32ToAuxInt(1) + v0.AddArg2(v1, v2) + v.AddArg2(v0, y) + return true + } +} func rewriteValuedec64_OpBitLen64(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -430,6 +470,62 @@ func rewriteValuedec64_OpEq64(v *Value) bool { return true } } +func rewriteValuedec64_OpHmul64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul64 x y) + // result: (Last p: (Hmul64u x y) xSign: (Int64Make xs:(Rsh32x32 (Int64Hi x) (Const32 [31])) xs) ySign: (Int64Make ys:(Rsh32x32 (Int64Hi y) (Const32 [31])) ys) (Sub64 (Sub64 p (And64 xSign y)) (And64 ySign x))) + for { + x := v_0 + y := v_1 + v.reset(OpLast) + p := b.NewValue0(v.Pos, OpHmul64u, typ.UInt64) + p.AddArg2(x, y) + xSign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + xs := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) + v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v3.AddArg(x) + v4 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v4.AuxInt = int32ToAuxInt(31) + xs.AddArg2(v3, v4) + xSign.AddArg2(xs, xs) + ySign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + ys := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v7.AddArg(y) + ys.AddArg2(v7, v4) + ySign.AddArg2(ys, ys) + v8 := b.NewValue0(v.Pos, OpSub64, typ.Int64) + v9 := b.NewValue0(v.Pos, OpSub64, typ.Int64) + v10 := b.NewValue0(v.Pos, OpAnd64, typ.Int64) + v10.AddArg2(xSign, y) + v9.AddArg2(p, v10) + v11 := b.NewValue0(v.Pos, OpAnd64, typ.Int64) + v11.AddArg2(ySign, x) + v8.AddArg2(v9, v11) + v.AddArg4(p, xSign, ySign, v8) + return true + } +} +func rewriteValuedec64_OpHmul64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul64u x y) + // result: (Select0 (Mul64uhilo x y)) + for { + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpMul64uhilo, types.NewTuple(typ.UInt64, typ.UInt64)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} func rewriteValuedec64_OpInt64Hi(v *Value) bool { v_0 := v.Args[0] // match: (Int64Hi (Int64Make hi _)) @@ -458,6 +554,14 @@ func rewriteValuedec64_OpInt64Lo(v *Value) bool { } return false } +func rewriteValuedec64_OpLast(v *Value) bool { + // match: (Last ___) + // result: v.Args[len(v.Args)-1] + for { + v.copyOf(v.Args[len(v.Args)-1]) + return true + } +} func rewriteValuedec64_OpLeq64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -1114,35 +1218,124 @@ func rewriteValuedec64_OpMul64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Mul64 x y) - // result: (Int64Make (Add32 (Mul32 (Int64Lo x) (Int64Hi y)) (Add32 (Mul32 (Int64Hi x) (Int64Lo y)) (Select0 (Mul32uhilo (Int64Lo x) (Int64Lo y))))) (Select1 (Mul32uhilo (Int64Lo x) (Int64Lo y)))) + // match: (Mul64 x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y0Hi: (Select0 x0y0) x0y0Lo: (Select1 x0y0) (Int64Make (Add32 x0y0Hi (Add32 (Mul32 x0 y1) (Mul32 x1 y0))) x0y0Lo)) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpInt64Make) - v0 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) - v1 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) - v2 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v2.AddArg(x) - v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v3.AddArg(y) - v1.AddArg2(v2, v3) - v4 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) - v6 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v6.AddArg(x) - v7 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v7.AddArg(y) - v5.AddArg2(v6, v7) - v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) - v9 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) - v9.AddArg2(v2, v7) - v8.AddArg(v9) - v4.AddArg2(v5, v8) - v0.AddArg2(v1, v4) - v10 := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) - v10.AddArg(v9) - v.AddArg2(v0, v10) + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x0y0.AddArg2(x0, y0) + x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x0y0Hi.AddArg(x0y0) + x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x0y0Lo.AddArg(x0y0) + v7 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v8 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) + v9 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) + v10.AddArg2(x0, y1) + v11 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) + v11.AddArg2(x1, y0) + v9.AddArg2(v10, v11) + v8.AddArg2(x0y0Hi, v9) + v7.AddArg2(v8, x0y0Lo) + v.AddArgs(x0, x1, y0, y1, x0y0, x0y0Hi, x0y0Lo, v7) + return true + } +} +func rewriteValuedec64_OpMul64uhilo(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mul64uhilo x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y1: (Mul32uhilo x0 y1) x1y0: (Mul32uhilo x1 y0) x1y1: (Mul32uhilo x1 y1) x0y0Hi: (Select0 x0y0) x0y0Lo: (Select1 x0y0) x0y1Hi: (Select0 x0y1) x0y1Lo: (Select1 x0y1) x1y0Hi: (Select0 x1y0) x1y0Lo: (Select1 x1y0) x1y1Hi: (Select0 x1y1) x1y1Lo: (Select1 x1y1) w1a: (Add32carry x0y0Hi x0y1Lo) w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 w1a)) w3a: (Add32withcarry x1y1Hi (Const32 [0]) (Select1 w2a)) w1b: (Add32carry x1y0Lo (Select0 w1a)) w2b: (Add32carrywithcarry x1y1Lo (Select0 w2a) (Select1 w1b)) w3b: (Add32withcarry w3a (Const32 [0]) (Select1 w2b)) (MakeTuple (Int64Make w3b (Select0 w2b)) (Int64Make (Select0 w1b) x0y0Lo))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x0y0.AddArg2(x0, y0) + x0y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x0y1.AddArg2(x0, y1) + x1y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x1y0.AddArg2(x1, y0) + x1y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x1y1.AddArg2(x1, y1) + x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x0y0Hi.AddArg(x0y0) + x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x0y0Lo.AddArg(x0y0) + x0y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x0y1Hi.AddArg(x0y1) + x0y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x0y1Lo.AddArg(x0y1) + x1y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x1y0Hi.AddArg(x1y0) + x1y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x1y0Lo.AddArg(x1y0) + x1y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x1y1Hi.AddArg(x1y1) + x1y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x1y1Lo.AddArg(x1y1) + w1a := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + w1a.AddArg2(x0y0Hi, x0y1Lo) + w2a := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags)) + v18 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v18.AddArg(w1a) + w2a.AddArg3(x0y1Hi, x1y0Hi, v18) + w3a := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32) + v20 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v20.AuxInt = int32ToAuxInt(0) + v21 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v21.AddArg(w2a) + w3a.AddArg3(x1y1Hi, v20, v21) + w1b := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + v23 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v23.AddArg(w1a) + w1b.AddArg2(x1y0Lo, v23) + w2b := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags)) + v25 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v25.AddArg(w2a) + v26 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v26.AddArg(w1b) + w2b.AddArg3(x1y1Lo, v25, v26) + w3b := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32) + v28 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v28.AddArg(w2b) + w3b.AddArg3(w3a, v20, v28) + v29 := b.NewValue0(v.Pos, OpMakeTuple, types.NewTuple(typ.UInt64, typ.UInt64)) + v30 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v31 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v31.AddArg(w2b) + v30.AddArg2(w3b, v31) + v32 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v33 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v33.AddArg(w1b) + v32.AddArg2(v33, x0y0Lo) + v29.AddArg2(v30, v32) + v.AddArgs(x0, x1, y0, y1, x0y0, x0y1, x1y0, x1y1, x0y0Hi, x0y0Lo, x0y1Hi, x0y1Lo, x1y0Hi, x1y0Lo, x1y1Hi, x1y1Lo, w1a, w2a, w3a, w1b, w2b, w3b, v29) return true } } @@ -2705,6 +2898,34 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool { return true } } +func rewriteValuedec64_OpSelect0(v *Value) bool { + v_0 := v.Args[0] + // match: (Select0 (MakeTuple x y)) + // result: x + for { + if v_0.Op != OpMakeTuple { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValuedec64_OpSelect1(v *Value) bool { + v_0 := v.Args[0] + // match: (Select1 (MakeTuple x y)) + // result: y + for { + if v_0.Op != OpMakeTuple { + break + } + y := v_0.Args[1] + v.copyOf(y) + return true + } + return false +} func rewriteValuedec64_OpSignExt16to64(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -2815,29 +3036,33 @@ func rewriteValuedec64_OpSub64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Sub64 x y) - // result: (Int64Make (Sub32withcarry (Int64Hi x) (Int64Hi y) (Select1 (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 (Sub32carry (Int64Lo x) (Int64Lo y)))) + // match: (Sub64 x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) sub: (Sub32carry x0 y0) (Int64Make (Sub32withcarry x1 y1 (Select1 sub)) (Select0 sub))) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpInt64Make) - v0 := b.NewValue0(v.Pos, OpSub32withcarry, typ.Int32) - v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v2.AddArg(y) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) - v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v5.AddArg(x) - v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v6.AddArg(y) - v4.AddArg2(v5, v6) - v3.AddArg(v4) - v0.AddArg3(v1, v2, v3) - v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) - v7.AddArg(v4) - v.AddArg2(v0, v7) + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + sub := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + sub.AddArg2(x0, y0) + v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v6 := b.NewValue0(v.Pos, OpSub32withcarry, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v7.AddArg(sub) + v6.AddArg3(x1, y1, v7) + v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v8.AddArg(sub) + v5.AddArg2(v6, v8) + v.AddArg6(x0, x1, y0, y1, sub, v5) return true } } diff --git a/src/cmd/compile/internal/ssa/rewritedivmod.go b/src/cmd/compile/internal/ssa/rewritedivmod.go index fc37d84999f..02978075a8a 100644 --- a/src/cmd/compile/internal/ssa/rewritedivmod.go +++ b/src/cmd/compile/internal/ssa/rewritedivmod.go @@ -20,8 +20,6 @@ func rewriteValuedivmod(v *Value) bool { return rewriteValuedivmod_OpDiv8(v) case OpDiv8u: return rewriteValuedivmod_OpDiv8u(v) - case OpMod32u: - return rewriteValuedivmod_OpMod32u(v) } return false } @@ -646,7 +644,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { return true } // match: (Div64 x (Const64 [c])) - // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul + // cond: smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul // result: (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) (Const64 [smagic64(c).s - 1])) (Rsh64x64 x (Const64 [63]))) for { t := v.Type @@ -655,7 +653,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul) { + if !(smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul) { break } v.reset(OpSub64) @@ -676,7 +674,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { return true } // match: (Div64 x (Const64 [c])) - // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul + // cond: smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul // result: (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) (Const64 [smagic64(c).s])) (Rsh64x64 x (Const64 [63]))) for { t := v.Type @@ -685,7 +683,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul) { + if !(smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul) { break } v.reset(OpSub64) @@ -716,7 +714,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { config := b.Func.Config typ := &b.Func.Config.Types // match: (Div64u x (Const64 [c])) - // cond: t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul + // cond: t.IsSigned() && smagicOK64(c) && config.useHmul // result: (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) for { t := v.Type @@ -725,7 +723,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul) { + if !(t.IsSigned() && smagicOK64(c) && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -740,7 +738,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul + // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul // result: (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) for { t := v.Type @@ -749,7 +747,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul) { + if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -764,7 +762,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul + // cond: umagicOK64(c) && c&1 == 0 && config.useHmul // result: (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) (Const64 [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 [umagic64(c).s - 2])) for { t := v.Type @@ -773,7 +771,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul) { + if !(umagicOK64(c) && c&1 == 0 && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -792,7 +790,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul + // cond: umagicOK64(c) && config.useAvg && config.useHmul // result: (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1])) for { t := v.Type @@ -801,7 +799,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul) { + if !(umagicOK64(c) && config.useAvg && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -817,66 +815,6 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { v.AddArg2(v0, v3) return true } - // match: (Div64u x (Const64 [c])) - // cond: c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul - // result: (Add64 (Add64 (Add64 (Lsh64x64 (ZeroExt32to64 (Div32u (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) (Const32 [int32(c)]))) (Const64 [32])) (ZeroExt32to64 (Div32u (Trunc64to32 x) (Const32 [int32(c)])))) (Mul64 (ZeroExt32to64 (Mod32u (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) (Const32 [int32(c)]))) (Const64 [int64((1<<32)/c)]))) (ZeroExt32to64 (Div32u (Add32 (Mod32u (Trunc64to32 x) (Const32 [int32(c)])) (Mul32 (Mod32u (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) (Const32 [int32(c)])) (Const32 [int32((1<<32)%c)]))) (Const32 [int32(c)])))) - for { - x := v_0 - if v_1.Op != OpConst64 { - break - } - c := auxIntToInt64(v_1.AuxInt) - if !(c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul) { - break - } - v.reset(OpAdd64) - v0 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64) - v1 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64) - v2 := b.NewValue0(v.Pos, OpLsh64x64, typ.UInt64) - v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v4 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32) - v6 := b.NewValue0(v.Pos, OpRsh64Ux64, typ.UInt64) - v7 := b.NewValue0(v.Pos, OpConst64, typ.UInt64) - v7.AuxInt = int64ToAuxInt(32) - v6.AddArg2(x, v7) - v5.AddArg(v6) - v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v8.AuxInt = int32ToAuxInt(int32(c)) - v4.AddArg2(v5, v8) - v3.AddArg(v4) - v2.AddArg2(v3, v7) - v9 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v10 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32) - v11 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32) - v11.AddArg(x) - v10.AddArg2(v11, v8) - v9.AddArg(v10) - v1.AddArg2(v2, v9) - v12 := b.NewValue0(v.Pos, OpMul64, typ.UInt64) - v13 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v14 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32) - v14.AddArg2(v5, v8) - v13.AddArg(v14) - v15 := b.NewValue0(v.Pos, OpConst64, typ.UInt64) - v15.AuxInt = int64ToAuxInt(int64((1 << 32) / c)) - v12.AddArg2(v13, v15) - v0.AddArg2(v1, v12) - v16 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v17 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32) - v18 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) - v19 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32) - v19.AddArg2(v11, v8) - v20 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) - v21 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v21.AuxInt = int32ToAuxInt(int32((1 << 32) % c)) - v20.AddArg2(v14, v21) - v18.AddArg2(v19, v20) - v17.AddArg2(v18, v8) - v16.AddArg(v17) - v.AddArg2(v0, v16) - return true - } return false } func rewriteValuedivmod_OpDiv8(v *Value) bool { @@ -982,35 +920,6 @@ func rewriteValuedivmod_OpDiv8u(v *Value) bool { } return false } -func rewriteValuedivmod_OpMod32u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Mod32u x (Const32 [c])) - // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c) - // result: (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) - for { - t := v.Type - x := v_0 - if v_1.Op != OpConst32 { - break - } - c := auxIntToInt32(v_1.AuxInt) - if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) { - break - } - v.reset(OpSub32) - v0 := b.NewValue0(v.Pos, OpMul32, t) - v1 := b.NewValue0(v.Pos, OpDiv32u, t) - v2 := b.NewValue0(v.Pos, OpConst32, t) - v2.AuxInt = int32ToAuxInt(c) - v1.AddArg2(x, v2) - v0.AddArg2(v1, v2) - v.AddArg2(x, v0) - return true - } - return false -} func rewriteBlockdivmod(b *Block) bool { return false } diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 891f017d7ba..fd5139c0bbd 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -14724,7 +14724,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool { return true } // match: (Mod16u x (Const16 [c])) - // cond: x.Op != OpConst16 && c > 0 && umagicOK16(c) + // cond: x.Op != OpConst16 && c != 0 // result: (Sub16 x (Mul16 (Div16u x (Const16 [c])) (Const16 [c]))) for { t := v.Type @@ -14733,7 +14733,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool { break } c := auxIntToInt16(v_1.AuxInt) - if !(x.Op != OpConst16 && c > 0 && umagicOK16(c)) { + if !(x.Op != OpConst16 && c != 0) { break } v.reset(OpSub16) @@ -14878,7 +14878,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool { return true } // match: (Mod32u x (Const32 [c])) - // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c) + // cond: x.Op != OpConst32 && c != 0 // result: (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) for { t := v.Type @@ -14887,7 +14887,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) { + if !(x.Op != OpConst32 && c != 0) { break } v.reset(OpSub32) @@ -15043,7 +15043,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool { return true } // match: (Mod64u x (Const64 [c])) - // cond: x.Op != OpConst64 && c > 0 && umagicOK64(c) + // cond: x.Op != OpConst64 && c != 0 // result: (Sub64 x (Mul64 (Div64u x (Const64 [c])) (Const64 [c]))) for { t := v.Type @@ -15052,7 +15052,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(x.Op != OpConst64 && c > 0 && umagicOK64(c)) { + if !(x.Op != OpConst64 && c != 0) { break } v.reset(OpSub64) @@ -15197,7 +15197,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool { return true } // match: (Mod8u x (Const8 [c])) - // cond: x.Op != OpConst8 && c > 0 && umagicOK8( c) + // cond: x.Op != OpConst8 && c != 0 // result: (Sub8 x (Mul8 (Div8u x (Const8 [c])) (Const8 [c]))) for { t := v.Type @@ -15206,7 +15206,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool { break } c := auxIntToInt8(v_1.AuxInt) - if !(x.Op != OpConst8 && c > 0 && umagicOK8(c)) { + if !(x.Op != OpConst8 && c != 0) { break } v.reset(OpSub8) diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 06887c934e7..bf9e71c1701 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1223,7 +1223,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64) + sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.PPC64, sys.S390X, sys.MIPS64, sys.MIPS, sys.RISCV64, sys.Loong64) alias("math/bits", "Mul", "math/bits", "Mul64", p8...) alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...) addF("math/bits", "Add64", diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index 5a4e577fb6d..9311f843454 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -33,6 +33,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"386", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, {"386", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, {"386", "math", "sqrt"}: struct{}{}, + {"386", "math/bits", "Mul64"}: struct{}{}, {"386", "math/bits", "ReverseBytes32"}: struct{}{}, {"386", "math/bits", "ReverseBytes64"}: struct{}{}, {"386", "math/bits", "TrailingZeros16"}: struct{}{}, @@ -208,6 +209,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"arm", "math/bits", "Len32"}: struct{}{}, {"arm", "math/bits", "Len64"}: struct{}{}, {"arm", "math/bits", "Len8"}: struct{}{}, + {"arm", "math/bits", "Mul64"}: struct{}{}, {"arm", "math/bits", "ReverseBytes32"}: struct{}{}, {"arm", "math/bits", "ReverseBytes64"}: struct{}{}, {"arm", "math/bits", "RotateLeft32"}: struct{}{}, @@ -557,6 +559,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mips", "math/bits", "Len32"}: struct{}{}, {"mips", "math/bits", "Len64"}: struct{}{}, {"mips", "math/bits", "Len8"}: struct{}{}, + {"mips", "math/bits", "Mul64"}: struct{}{}, {"mips", "math/bits", "TrailingZeros16"}: struct{}{}, {"mips", "math/bits", "TrailingZeros32"}: struct{}{}, {"mips", "math/bits", "TrailingZeros64"}: struct{}{}, @@ -806,6 +809,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mipsle", "math/bits", "Len32"}: struct{}{}, {"mipsle", "math/bits", "Len64"}: struct{}{}, {"mipsle", "math/bits", "Len8"}: struct{}{}, + {"mipsle", "math/bits", "Mul64"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros16"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros32"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros64"}: struct{}{}, diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go index b9e226b2074..989ae0a1db2 100644 --- a/src/cmd/compile/internal/walk/expr.go +++ b/src/cmd/compile/internal/walk/expr.go @@ -704,27 +704,21 @@ func walkDivMod(n *ir.BinaryExpr, init *ir.Nodes) ir.Node { // runtime calls late in SSA processing. if types.RegSize < 8 && (et == types.TINT64 || et == types.TUINT64) { if n.Y.Op() == ir.OLITERAL { - // Leave div/mod by constant powers of 2 or small 16-bit constants. + // Leave div/mod by non-zero uint64 constants. // The SSA backend will handle those. + // (Zero constants should have been rejected already, but we check just in case.) switch et { case types.TINT64: - c := ir.Int64Val(n.Y) - if c < 0 { - c = -c - } - if c != 0 && c&(c-1) == 0 { + if ir.Int64Val(n.Y) != 0 { return n } case types.TUINT64: - c := ir.Uint64Val(n.Y) - if c < 1<<16 { - return n - } - if c != 0 && c&(c-1) == 0 { + if ir.Uint64Val(n.Y) != 0 { return n } } } + // Build call to uint64div, uint64mod, int64div, or int64mod. var fn string if et == types.TINT64 { fn = "int64" diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go index d0aad088496..348880f622f 100644 --- a/src/cmd/compile/internal/x86/ssa.go +++ b/src/cmd/compile/internal/x86/ssa.go @@ -167,7 +167,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.Op386SBBL: opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) - case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: + case ssa.Op386ADDLcarry, ssa.Op386ADCLcarry, ssa.Op386SUBLcarry: // output 0 is carry/borrow, output 1 is the low 32 bits. opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg()) diff --git a/test/codegen/divmod.go b/test/codegen/divmod.go index 3a78180817f..98d0852398c 100644 --- a/test/codegen/divmod.go +++ b/test/codegen/divmod.go @@ -279,7 +279,10 @@ func div3_uint32(i uint32) uint32 { } func div3_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1431655766" + // 386: "MULL" + // 386: "SHRL [$]1" + // 386 -".*CALL" // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]1," @@ -308,7 +311,10 @@ func div14_uint32(i uint32) uint32 { } func div14_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1840700270," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: -".*CALL" // arm64: "MOVD [$]-7905747460161236406," // arm64: "UMULH" // arm64: "LSR [$]2," @@ -343,7 +349,10 @@ func div7_uint32(i uint32) uint32 { } func div7_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1840700269," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: -".*CALL" // arm64: "MOVD [$]2635249153387078803," // arm64: "UMULH" // arm64: "SUB", @@ -353,7 +362,11 @@ func div7_uint64(i uint64) uint64 { } func div12345_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1444876402," + // 386: "MOVL [$]835683390," + // 386: "MULL" + // 386: "SHRL [$]13," + // 386: "SHLL [$]19," // arm64: "MOVD [$]-6205696892516465602," // arm64: "UMULH" // arm64: "LSR [$]13," @@ -869,7 +882,12 @@ func ndivis6_int32(i int32) bool { } func divis6_int64(i int64) bool { - // 386 "CALL" + // 386: "IMUL3L [$]-1431655766," + // 386: "IMUL3L [$]-1431655765," + // 386: "ADCL [$]715827882," + // 386: "CMPL .*, [$]715827882" + // 386: "CMPL .*, [$]-1431655766" + // 386: "SETLS" // arm64: "MOVD [$]-6148914691236517205," // arm64: "MUL " // arm64: "MOVD [$]3074457345618258602," @@ -880,7 +898,12 @@ func divis6_int64(i int64) bool { } func ndivis6_int64(i int64) bool { - // 386 "CALL" + // 386: "IMUL3L [$]-1431655766," + // 386: "IMUL3L [$]-1431655765," + // 386: "ADCL [$]715827882," + // 386: "CMPL .*, [$]715827882" + // 386: "CMPL .*, [$]-1431655766" + // 386: "SETHI" // arm64: "MOVD [$]-6148914691236517205," // arm64: "MUL " // arm64: "MOVD [$]3074457345618258602," @@ -973,7 +996,14 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) { } func div_divis6_uint64(i uint64) (uint64, bool) { - // 386 "CALL" + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: "SHLL [$]30," + // 386: "SETEQ" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]2," @@ -983,7 +1013,14 @@ func div_divis6_uint64(i uint64) (uint64, bool) { } func div_ndivis6_uint64(i uint64) (uint64, bool) { - // 386 "CALL" + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: "SHLL [$]30," + // 386: "SETNE" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]2," @@ -1091,7 +1128,16 @@ func div_ndivis6_int32(i int32) (int32, bool) { } func div_divis6_int64(i int64) (int64, bool) { - // 386 "CALL" + // 386: "ANDL [$]-1431655766," + // 386: "ANDL [$]-1431655765," + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "SUBL" "SBBL" + // 386: "MULL" + // 386: "SETEQ" + // 386: -"SET(LS|HI)" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "SMULH" // arm64: "ADD" @@ -1103,7 +1149,16 @@ func div_divis6_int64(i int64) (int64, bool) { } func div_ndivis6_int64(i int64) (int64, bool) { - // 386 "CALL" + // 386: "ANDL [$]-1431655766," + // 386: "ANDL [$]-1431655765," + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "SUBL" "SBBL" + // 386: "MULL" + // 386: "SETNE" + // 386: -"SET(LS|HI)" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "SMULH" // arm64: "ADD"