mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: add fma intrinsic for amd64
To permit ssa-level optimization, this change introduces an amd64 intrinsic
that generates the VFMADD231SD instruction for the fused-multiply-add
operation on systems that support it. System support is detected via
cpu.X86.HasFMA. A rewrite rule can then translate the generic ssa intrinsic
("Fma") to VFMADD231SD.
The benchmark compares the software implementation (old) with the intrinsic
(new).
name old time/op new time/op delta
Fma-4 27.2ns ± 1% 1.0ns ± 9% -96.48% (p=0.008 n=5+5)
Updates #25819.
Change-Id: I966655e5f96817a5d06dff5942418a3915b09584
Reviewed-on: https://go-review.googlesource.com/c/go/+/137156
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
50f4896b72
commit
7a6da218b1
12 changed files with 85 additions and 0 deletions
|
|
@ -147,6 +147,7 @@ func init() {
|
|||
|
||||
fp01 = regInfo{inputs: nil, outputs: fponly}
|
||||
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
|
||||
fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
|
||||
fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly}
|
||||
fpgp = regInfo{inputs: fponly, outputs: gponly}
|
||||
gpfp = regInfo{inputs: gponly, outputs: fponly}
|
||||
|
|
@ -478,6 +479,10 @@ func init() {
|
|||
// Any use must be preceded by a successful check of runtime.x86HasSSE41.
|
||||
{name: "ROUNDSD", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSD"}, // rounds arg0 depending on auxint, 1 means math.Floor, 2 Ceil, 3 Trunc
|
||||
|
||||
// VFMADD231SD only exists on platforms with the FMA3 instruction set.
|
||||
// Any use must be preceded by a successful check of runtime.support_fma.
|
||||
{name: "VFMADD231SD", argLength: 3, reg: fp31, resultInArg0: true, asm: "VFMADD231SD"},
|
||||
|
||||
{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
|
||||
{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
|
||||
// Note: SBBW and SBBB are subsumed by SBBL
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue