mirror of
https://github.com/golang/go.git
synced 2025-11-11 22:21:06 +00:00
cmd/compile: add fma intrinsic for arm
This change introduces an arm intrinsic that generates the FMULAD instruction for the fused-multiply-add operation on systems that support it. System support is detected via cpu.ARM.HasVFPv4. A rewrite rule translates the generic intrinsic to FMULAD. Updates #25819. Change-Id: I8459e5dd1cdbdca35f88a78dbeb7d387f1e20efa Reviewed-on: https://go-review.googlesource.com/c/go/+/142117 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
06ac26279c
commit
58b031949b
12 changed files with 81 additions and 1 deletions
|
|
@ -226,7 +226,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
p.To.Reg = r
|
p.To.Reg = r
|
||||||
case ssa.OpARMSRR:
|
case ssa.OpARMSRR:
|
||||||
genregshift(s, arm.AMOVW, 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR)
|
genregshift(s, arm.AMOVW, 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR)
|
||||||
case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD:
|
case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD, ssa.OpARMFMULAD:
|
||||||
r := v.Reg()
|
r := v.Reg()
|
||||||
r0 := v.Args[0].Reg()
|
r0 := v.Args[0].Reg()
|
||||||
r1 := v.Args[1].Reg()
|
r1 := v.Args[1].Reg()
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,7 @@ var runtimeDecls = [...]struct {
|
||||||
{"x86HasPOPCNT", varTag, 15},
|
{"x86HasPOPCNT", varTag, 15},
|
||||||
{"x86HasSSE41", varTag, 15},
|
{"x86HasSSE41", varTag, 15},
|
||||||
{"x86HasFMA", varTag, 15},
|
{"x86HasFMA", varTag, 15},
|
||||||
|
{"armHasVFPv4", varTag, 15},
|
||||||
{"arm64HasATOMICS", varTag, 15},
|
{"arm64HasATOMICS", varTag, 15},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -242,4 +242,5 @@ func checkptrArithmetic(unsafe.Pointer, []unsafe.Pointer)
|
||||||
var x86HasPOPCNT bool
|
var x86HasPOPCNT bool
|
||||||
var x86HasSSE41 bool
|
var x86HasSSE41 bool
|
||||||
var x86HasFMA bool
|
var x86HasFMA bool
|
||||||
|
var armHasVFPv4 bool
|
||||||
var arm64HasATOMICS bool
|
var arm64HasATOMICS bool
|
||||||
|
|
|
||||||
|
|
@ -312,6 +312,7 @@ var (
|
||||||
x86HasPOPCNT,
|
x86HasPOPCNT,
|
||||||
x86HasSSE41,
|
x86HasSSE41,
|
||||||
x86HasFMA,
|
x86HasFMA,
|
||||||
|
armHasVFPv4,
|
||||||
arm64HasATOMICS,
|
arm64HasATOMICS,
|
||||||
typedmemclr,
|
typedmemclr,
|
||||||
typedmemmove,
|
typedmemmove,
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,7 @@ func initssaconfig() {
|
||||||
x86HasPOPCNT = sysvar("x86HasPOPCNT") // bool
|
x86HasPOPCNT = sysvar("x86HasPOPCNT") // bool
|
||||||
x86HasSSE41 = sysvar("x86HasSSE41") // bool
|
x86HasSSE41 = sysvar("x86HasSSE41") // bool
|
||||||
x86HasFMA = sysvar("x86HasFMA") // bool
|
x86HasFMA = sysvar("x86HasFMA") // bool
|
||||||
|
armHasVFPv4 = sysvar("armHasVFPv4") // bool
|
||||||
arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
|
arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
|
||||||
typedmemclr = sysfunc("typedmemclr")
|
typedmemclr = sysfunc("typedmemclr")
|
||||||
typedmemmove = sysfunc("typedmemmove")
|
typedmemmove = sysfunc("typedmemmove")
|
||||||
|
|
@ -3357,6 +3358,36 @@ func init() {
|
||||||
return s.variable(n, types.Types[TFLOAT64])
|
return s.variable(n, types.Types[TFLOAT64])
|
||||||
},
|
},
|
||||||
sys.AMD64)
|
sys.AMD64)
|
||||||
|
addF("math", "Fma",
|
||||||
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), armHasVFPv4, s.sb)
|
||||||
|
v := s.load(types.Types[TBOOL], addr)
|
||||||
|
b := s.endBlock()
|
||||||
|
b.Kind = ssa.BlockIf
|
||||||
|
b.SetControl(v)
|
||||||
|
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||||
|
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||||
|
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||||
|
b.AddEdgeTo(bTrue)
|
||||||
|
b.AddEdgeTo(bFalse)
|
||||||
|
b.Likely = ssa.BranchLikely
|
||||||
|
|
||||||
|
// We have the intrinsic - use it directly.
|
||||||
|
s.startBlock(bTrue)
|
||||||
|
s.vars[n] = s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
|
||||||
|
s.endBlock().AddEdgeTo(bEnd)
|
||||||
|
|
||||||
|
// Call the pure Go version.
|
||||||
|
s.startBlock(bFalse)
|
||||||
|
a := s.call(n, callNormal)
|
||||||
|
s.vars[n] = s.load(types.Types[TFLOAT64], a)
|
||||||
|
s.endBlock().AddEdgeTo(bEnd)
|
||||||
|
|
||||||
|
// Merge results.
|
||||||
|
s.startBlock(bEnd)
|
||||||
|
return s.variable(n, types.Types[TFLOAT64])
|
||||||
|
},
|
||||||
|
sys.ARM)
|
||||||
|
|
||||||
makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
|
|
|
||||||
|
|
@ -210,6 +210,9 @@
|
||||||
|
|
||||||
(Round(32|64)F x) -> x
|
(Round(32|64)F x) -> x
|
||||||
|
|
||||||
|
// fused-multiply-add
|
||||||
|
(Fma x y z) -> (FMULAD z x y)
|
||||||
|
|
||||||
// comparisons
|
// comparisons
|
||||||
(Eq8 x y) -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
(Eq8 x y) -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
||||||
(Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
(Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
||||||
|
|
|
||||||
|
|
@ -192,6 +192,10 @@ func init() {
|
||||||
{name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
|
{name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
|
||||||
{name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
|
{name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
|
||||||
|
|
||||||
|
// FMULAD only exists on platforms with the VFPv4 instruction set.
|
||||||
|
// Any use must be preceded by a successful check of runtime.arm_support_vfpv4.
|
||||||
|
{name: "FMULAD", argLength: 3, reg: fp31, asm: "FMULAD", resultInArg0: true}, // arg0 + (arg1 * arg2)
|
||||||
|
|
||||||
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
|
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
|
||||||
{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
|
{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
|
||||||
{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1
|
{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1
|
||||||
|
|
|
||||||
|
|
@ -925,6 +925,7 @@ const (
|
||||||
OpARMMULAD
|
OpARMMULAD
|
||||||
OpARMMULSF
|
OpARMMULSF
|
||||||
OpARMMULSD
|
OpARMMULSD
|
||||||
|
OpARMFMULAD
|
||||||
OpARMAND
|
OpARMAND
|
||||||
OpARMANDconst
|
OpARMANDconst
|
||||||
OpARMOR
|
OpARMOR
|
||||||
|
|
@ -12119,6 +12120,22 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "FMULAD",
|
||||||
|
argLen: 3,
|
||||||
|
resultInArg0: true,
|
||||||
|
asm: arm.AFMULAD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
{2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "AND",
|
name: "AND",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
|
|
||||||
|
|
@ -538,6 +538,8 @@ func rewriteValueARM(v *Value) bool {
|
||||||
return rewriteValueARM_OpEqB_0(v)
|
return rewriteValueARM_OpEqB_0(v)
|
||||||
case OpEqPtr:
|
case OpEqPtr:
|
||||||
return rewriteValueARM_OpEqPtr_0(v)
|
return rewriteValueARM_OpEqPtr_0(v)
|
||||||
|
case OpFma:
|
||||||
|
return rewriteValueARM_OpFma_0(v)
|
||||||
case OpGeq16:
|
case OpGeq16:
|
||||||
return rewriteValueARM_OpGeq16_0(v)
|
return rewriteValueARM_OpGeq16_0(v)
|
||||||
case OpGeq16U:
|
case OpGeq16U:
|
||||||
|
|
@ -17159,6 +17161,21 @@ func rewriteValueARM_OpEqPtr_0(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueARM_OpFma_0(v *Value) bool {
|
||||||
|
// match: (Fma x y z)
|
||||||
|
// cond:
|
||||||
|
// result: (FMULAD z x y)
|
||||||
|
for {
|
||||||
|
z := v.Args[2]
|
||||||
|
x := v.Args[0]
|
||||||
|
y := v.Args[1]
|
||||||
|
v.reset(OpARMFMULAD)
|
||||||
|
v.AddArg(z)
|
||||||
|
v.AddArg(x)
|
||||||
|
v.AddArg(y)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueARM_OpGeq16_0(v *Value) bool {
|
func rewriteValueARM_OpGeq16_0(v *Value) bool {
|
||||||
b := v.Block
|
b := v.Block
|
||||||
typ := &b.Func.Config.Types
|
typ := &b.Func.Config.Types
|
||||||
|
|
|
||||||
|
|
@ -25,5 +25,7 @@ var (
|
||||||
x86HasSSE41 bool
|
x86HasSSE41 bool
|
||||||
x86HasFMA bool
|
x86HasFMA bool
|
||||||
|
|
||||||
|
armHasVFPv4 bool
|
||||||
|
|
||||||
arm64HasATOMICS bool
|
arm64HasATOMICS bool
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -516,6 +516,8 @@ func cpuinit() {
|
||||||
x86HasSSE41 = cpu.X86.HasSSE41
|
x86HasSSE41 = cpu.X86.HasSSE41
|
||||||
x86HasFMA = cpu.X86.HasFMA
|
x86HasFMA = cpu.X86.HasFMA
|
||||||
|
|
||||||
|
armHasVFPv4 = cpu.ARM.HasVFPv4
|
||||||
|
|
||||||
arm64HasATOMICS = cpu.ARM64.HasATOMICS
|
arm64HasATOMICS = cpu.ARM64.HasATOMICS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,7 @@ func copysign(a, b, c float64) {
|
||||||
|
|
||||||
func fma(x, y, z float64) float64 {
|
func fma(x, y, z float64) float64 {
|
||||||
// amd64:"VFMADD231SD"
|
// amd64:"VFMADD231SD"
|
||||||
|
// arm/6:"FMULAD"
|
||||||
// arm64:"FMADDD"
|
// arm64:"FMADDD"
|
||||||
// s390x:"FMADD"
|
// s390x:"FMADD"
|
||||||
// ppc64:"FMADD"
|
// ppc64:"FMADD"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue