mirror of
https://github.com/golang/go.git
synced 2025-11-10 13:41:05 +00:00
cmd/compile: add fma intrinsic for arm
This change introduces an arm intrinsic that generates the FMULAD instruction for the fused-multiply-add operation on systems that support it. System support is detected via cpu.ARM.HasVFPv4. A rewrite rule translates the generic intrinsic to FMULAD. Updates #25819. Change-Id: I8459e5dd1cdbdca35f88a78dbeb7d387f1e20efa Reviewed-on: https://go-review.googlesource.com/c/go/+/142117 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
06ac26279c
commit
58b031949b
12 changed files with 81 additions and 1 deletions
|
|
@ -226,7 +226,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.To.Reg = r
|
||||
case ssa.OpARMSRR:
|
||||
genregshift(s, arm.AMOVW, 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR)
|
||||
case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD:
|
||||
case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD, ssa.OpARMFMULAD:
|
||||
r := v.Reg()
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ var runtimeDecls = [...]struct {
|
|||
{"x86HasPOPCNT", varTag, 15},
|
||||
{"x86HasSSE41", varTag, 15},
|
||||
{"x86HasFMA", varTag, 15},
|
||||
{"armHasVFPv4", varTag, 15},
|
||||
{"arm64HasATOMICS", varTag, 15},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -242,4 +242,5 @@ func checkptrArithmetic(unsafe.Pointer, []unsafe.Pointer)
|
|||
var x86HasPOPCNT bool
|
||||
var x86HasSSE41 bool
|
||||
var x86HasFMA bool
|
||||
var armHasVFPv4 bool
|
||||
var arm64HasATOMICS bool
|
||||
|
|
|
|||
|
|
@ -312,6 +312,7 @@ var (
|
|||
x86HasPOPCNT,
|
||||
x86HasSSE41,
|
||||
x86HasFMA,
|
||||
armHasVFPv4,
|
||||
arm64HasATOMICS,
|
||||
typedmemclr,
|
||||
typedmemmove,
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ func initssaconfig() {
|
|||
x86HasPOPCNT = sysvar("x86HasPOPCNT") // bool
|
||||
x86HasSSE41 = sysvar("x86HasSSE41") // bool
|
||||
x86HasFMA = sysvar("x86HasFMA") // bool
|
||||
armHasVFPv4 = sysvar("armHasVFPv4") // bool
|
||||
arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
|
||||
typedmemclr = sysfunc("typedmemclr")
|
||||
typedmemmove = sysfunc("typedmemmove")
|
||||
|
|
@ -3357,6 +3358,36 @@ func init() {
|
|||
return s.variable(n, types.Types[TFLOAT64])
|
||||
},
|
||||
sys.AMD64)
|
||||
addF("math", "Fma",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), armHasVFPv4, s.sb)
|
||||
v := s.load(types.Types[TBOOL], addr)
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.SetControl(v)
|
||||
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||
b.AddEdgeTo(bTrue)
|
||||
b.AddEdgeTo(bFalse)
|
||||
b.Likely = ssa.BranchLikely
|
||||
|
||||
// We have the intrinsic - use it directly.
|
||||
s.startBlock(bTrue)
|
||||
s.vars[n] = s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Call the pure Go version.
|
||||
s.startBlock(bFalse)
|
||||
a := s.call(n, callNormal)
|
||||
s.vars[n] = s.load(types.Types[TFLOAT64], a)
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Merge results.
|
||||
s.startBlock(bEnd)
|
||||
return s.variable(n, types.Types[TFLOAT64])
|
||||
},
|
||||
sys.ARM)
|
||||
|
||||
makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
|
|
|
|||
|
|
@ -210,6 +210,9 @@
|
|||
|
||||
(Round(32|64)F x) -> x
|
||||
|
||||
// fused-multiply-add
|
||||
(Fma x y z) -> (FMULAD z x y)
|
||||
|
||||
// comparisons
|
||||
(Eq8 x y) -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
||||
(Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
|
||||
|
|
|
|||
|
|
@ -192,6 +192,10 @@ func init() {
|
|||
{name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
|
||||
{name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
|
||||
|
||||
// FMULAD only exists on platforms with the VFPv4 instruction set.
|
||||
// Any use must be preceded by a successful check of runtime.arm_support_vfpv4.
|
||||
{name: "FMULAD", argLength: 3, reg: fp31, asm: "FMULAD", resultInArg0: true}, // arg0 + (arg1 * arg2)
|
||||
|
||||
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
|
||||
{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
|
||||
{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1
|
||||
|
|
|
|||
|
|
@ -925,6 +925,7 @@ const (
|
|||
OpARMMULAD
|
||||
OpARMMULSF
|
||||
OpARMMULSD
|
||||
OpARMFMULAD
|
||||
OpARMAND
|
||||
OpARMANDconst
|
||||
OpARMOR
|
||||
|
|
@ -12119,6 +12120,22 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMULAD",
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
asm: arm.AFMULAD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AND",
|
||||
argLen: 2,
|
||||
|
|
|
|||
|
|
@ -538,6 +538,8 @@ func rewriteValueARM(v *Value) bool {
|
|||
return rewriteValueARM_OpEqB_0(v)
|
||||
case OpEqPtr:
|
||||
return rewriteValueARM_OpEqPtr_0(v)
|
||||
case OpFma:
|
||||
return rewriteValueARM_OpFma_0(v)
|
||||
case OpGeq16:
|
||||
return rewriteValueARM_OpGeq16_0(v)
|
||||
case OpGeq16U:
|
||||
|
|
@ -17159,6 +17161,21 @@ func rewriteValueARM_OpEqPtr_0(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM_OpFma_0(v *Value) bool {
|
||||
// match: (Fma x y z)
|
||||
// cond:
|
||||
// result: (FMULAD z x y)
|
||||
for {
|
||||
z := v.Args[2]
|
||||
x := v.Args[0]
|
||||
y := v.Args[1]
|
||||
v.reset(OpARMFMULAD)
|
||||
v.AddArg(z)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM_OpGeq16_0(v *Value) bool {
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
|
|
|
|||
|
|
@ -25,5 +25,7 @@ var (
|
|||
x86HasSSE41 bool
|
||||
x86HasFMA bool
|
||||
|
||||
armHasVFPv4 bool
|
||||
|
||||
arm64HasATOMICS bool
|
||||
)
|
||||
|
|
|
|||
|
|
@ -516,6 +516,8 @@ func cpuinit() {
|
|||
x86HasSSE41 = cpu.X86.HasSSE41
|
||||
x86HasFMA = cpu.X86.HasFMA
|
||||
|
||||
armHasVFPv4 = cpu.ARM.HasVFPv4
|
||||
|
||||
arm64HasATOMICS = cpu.ARM64.HasATOMICS
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -109,6 +109,7 @@ func copysign(a, b, c float64) {
|
|||
|
||||
func fma(x, y, z float64) float64 {
|
||||
// amd64:"VFMADD231SD"
|
||||
// arm/6:"FMULAD"
|
||||
// arm64:"FMADDD"
|
||||
// s390x:"FMADD"
|
||||
// ppc64:"FMADD"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue