mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile/internal/ssa: emit IMUL3{L/Q} for MUL{L/Q}const on x86
cmd/asm now supports three-operand form of IMUL,
so instead of using IMUL with resultInArg0, emit IMUL3 instruction.
This results in less redundant MOVs where SSA assigns
different registers to input[0] and dst arguments.
Note: these have exactly the same encoding when reg0=reg1:
IMUL3x $const, reg0, reg1
IMULx $const, reg
Two-operand IMULx is like a crippled IMUL3x, with dst fixed to input[0].
This is why we don't bother to generate IMULx for the case where
dst is the same as input[0].
Change-Id: I4becda475b3dffdd07b6fdf1c75bacc82af654e4
Reviewed-on: https://go-review.googlesource.com/99656
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Giovanni Bajo <rasky@develer.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
080187f4f7
commit
85a8d25d53
6 changed files with 14 additions and 31 deletions
|
|
@ -487,19 +487,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
|
|
||||||
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
|
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
|
||||||
r := v.Reg()
|
r := v.Reg()
|
||||||
if r != v.Args[0].Reg() {
|
|
||||||
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
||||||
}
|
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_CONST
|
p.From.Type = obj.TYPE_CONST
|
||||||
p.From.Offset = v.AuxInt
|
p.From.Offset = v.AuxInt
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = r
|
p.To.Reg = r
|
||||||
// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
|
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
|
||||||
// then we don't need to use resultInArg0 for these ops.
|
|
||||||
//p.From3 = new(obj.Addr)
|
|
||||||
//p.From3.Type = obj.TYPE_REG
|
|
||||||
//p.From3.Reg = v.Args[0].Reg()
|
|
||||||
|
|
||||||
case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
|
case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
|
||||||
ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
|
ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
|
||||||
|
|
|
||||||
|
|
@ -191,7 +191,7 @@ func init() {
|
||||||
{name: "SBBLconst", argLength: 2, reg: gp1carry1, asm: "SBBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0-auxint-borrow(arg1), where arg1 is flags
|
{name: "SBBLconst", argLength: 2, reg: gp1carry1, asm: "SBBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0-auxint-borrow(arg1), where arg1 is flags
|
||||||
|
|
||||||
{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
|
{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
|
||||||
{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
|
{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMUL3L", aux: "Int32", clobberFlags: true}, // arg0 * auxint
|
||||||
|
|
||||||
{name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
|
{name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
|
||||||
{name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
|
{name: "HMULLU", argLength: 2, reg: gp21hmul, commutative: true, asm: "MULL", clobberFlags: true}, // (arg0 * arg1) >> width
|
||||||
|
|
|
||||||
|
|
@ -205,8 +205,8 @@ func init() {
|
||||||
|
|
||||||
{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
|
{name: "MULQ", argLength: 2, reg: gp21, asm: "IMULQ", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
|
||||||
{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
|
{name: "MULL", argLength: 2, reg: gp21, asm: "IMULL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0 * arg1
|
||||||
{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMULQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
|
{name: "MULQconst", argLength: 1, reg: gp11, asm: "IMUL3Q", aux: "Int32", clobberFlags: true}, // arg0 * auxint
|
||||||
{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMULL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 * auxint
|
{name: "MULLconst", argLength: 1, reg: gp11, asm: "IMUL3L", aux: "Int32", clobberFlags: true}, // arg0 * auxint
|
||||||
|
|
||||||
{name: "HMULQ", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
|
{name: "HMULQ", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULQ", clobberFlags: true}, // (arg0 * arg1) >> width
|
||||||
{name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
|
{name: "HMULL", argLength: 2, reg: gp21hmul, commutative: true, asm: "IMULL", clobberFlags: true}, // (arg0 * arg1) >> width
|
||||||
|
|
|
||||||
|
|
@ -2680,9 +2680,8 @@ var opcodeTable = [...]opInfo{
|
||||||
name: "MULLconst",
|
name: "MULLconst",
|
||||||
auxType: auxInt32,
|
auxType: auxInt32,
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
resultInArg0: true,
|
|
||||||
clobberFlags: true,
|
clobberFlags: true,
|
||||||
asm: x86.AIMULL,
|
asm: x86.AIMUL3L,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{0, 239}, // AX CX DX BX BP SI DI
|
{0, 239}, // AX CX DX BX BP SI DI
|
||||||
|
|
@ -5216,9 +5215,8 @@ var opcodeTable = [...]opInfo{
|
||||||
name: "MULQconst",
|
name: "MULQconst",
|
||||||
auxType: auxInt32,
|
auxType: auxInt32,
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
resultInArg0: true,
|
|
||||||
clobberFlags: true,
|
clobberFlags: true,
|
||||||
asm: x86.AIMULQ,
|
asm: x86.AIMUL3Q,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||||
|
|
@ -5232,9 +5230,8 @@ var opcodeTable = [...]opInfo{
|
||||||
name: "MULLconst",
|
name: "MULLconst",
|
||||||
auxType: auxInt32,
|
auxType: auxInt32,
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
resultInArg0: true,
|
|
||||||
clobberFlags: true,
|
clobberFlags: true,
|
||||||
asm: x86.AIMULL,
|
asm: x86.AIMUL3L,
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||||
|
|
|
||||||
|
|
@ -335,19 +335,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
|
|
||||||
case ssa.Op386MULLconst:
|
case ssa.Op386MULLconst:
|
||||||
r := v.Reg()
|
r := v.Reg()
|
||||||
if r != v.Args[0].Reg() {
|
|
||||||
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
||||||
}
|
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_CONST
|
p.From.Type = obj.TYPE_CONST
|
||||||
p.From.Offset = v.AuxInt
|
p.From.Offset = v.AuxInt
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = r
|
p.To.Reg = r
|
||||||
// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
|
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
|
||||||
// then we don't need to use resultInArg0 for these ops.
|
|
||||||
//p.From3 = new(obj.Addr)
|
|
||||||
//p.From3.Type = obj.TYPE_REG
|
|
||||||
//p.From3.Reg = v.Args[0].Reg()
|
|
||||||
|
|
||||||
case ssa.Op386SUBLconst,
|
case ssa.Op386SUBLconst,
|
||||||
ssa.Op386ADCLconst,
|
ssa.Op386ADCLconst,
|
||||||
|
|
|
||||||
|
|
@ -30,14 +30,14 @@ func Pow2Muls(n1, n2 int) (int, int) {
|
||||||
// ------------------ //
|
// ------------------ //
|
||||||
|
|
||||||
func MergeMuls1(n int) int {
|
func MergeMuls1(n int) int {
|
||||||
// amd64:"IMULQ\t[$]46"
|
// amd64:"IMUL3Q\t[$]46"
|
||||||
// 386:"IMULL\t[$]46"
|
// 386:"IMUL3L\t[$]46"
|
||||||
return 15*n + 31*n // 46n
|
return 15*n + 31*n // 46n
|
||||||
}
|
}
|
||||||
|
|
||||||
func MergeMuls2(n int) int {
|
func MergeMuls2(n int) int {
|
||||||
// amd64:"IMULQ\t[$]23","ADDQ\t[$]29"
|
// amd64:"IMUL3Q\t[$]23","ADDQ\t[$]29"
|
||||||
// 386:"IMULL\t[$]23","ADDL\t[$]29"
|
// 386:"IMUL3L\t[$]23","ADDL\t[$]29"
|
||||||
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
return 5*n + 7*(n+1) + 11*(n+2) // 23n + 29
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -48,8 +48,8 @@ func MergeMuls3(a, n int) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
func MergeMuls4(n int) int {
|
func MergeMuls4(n int) int {
|
||||||
// amd64:"IMULQ\t[$]14"
|
// amd64:"IMUL3Q\t[$]14"
|
||||||
// 386:"IMULL\t[$]14"
|
// 386:"IMUL3L\t[$]14"
|
||||||
return 23*n - 9*n // 14n
|
return 23*n - 9*n // 14n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue