mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: add debug-hash flag for fused-multiply-add
This adds a -d debug flag "fmahash" for hashcode search for floating point architecture-dependent problems. This variable has no effect on architectures w/o fused-multiply-add. This was rebased onto the GOSSAHASH renovation so that this could have its own dedicated environment variable, and so that it would be cheap (a nil check) to check it in the normal case. Includes a basic test of the trigger plumbing. Sample use (on arm64, ppc64le, s390x): % GOCOMPILEDEBUG=fmahash=001110110 \ go build -o foo cmd/compile/internal/ssa/testdata/fma.go fmahash triggered main.main:24 101111101101111001110110 GOFMAHASH triggered main.main:20 010111010000101110111011 1.0000000000000002 1.0000000000000004 -2.220446049250313e-16 exit status 1 The intended use is in conjunction with github.com/dr2chase/gossahash, which will probably acquire a flag "-fma" to streamline its use. This tool+use was inspired by an ad hoc use of this technique "in anger" to debug this very problem. This is also a dry-run for using this same technique to identify code sensitive to loop variable lifetime/capture, should we make that change. Example intended use, with current search tool (using old environment variable), for a test example: gossahash -e GOFMAHASH GOMAGIC=GOFMAHASH go run fma.go Trying go args=[...], env=[GOFMAHASH=1 GOMAGIC=GOFMAHASH] go failed (81 distinct triggers): exit status 1 Trying go args=[...], env=[GOFMAHASH=11 GOMAGIC=GOFMAHASH] go failed (39 distinct triggers): exit status 1 Trying go args=[...], env=[GOFMAHASH=011 GOMAGIC=GOFMAHASH] go failed (18 distinct triggers): exit status 1 Trying go args=[...], env=[GOFMAHASH=0011 GOMAGIC=GOFMAHASH] Trying go args=[...], env=[GOFMAHASH=1011 GOMAGIC=GOFMAHASH] ... Trying go args=[...], env=[GOFMAHASH=0110111011 GOMAGIC=GOFMAHASH] Trying go args=[...], env=[GOFMAHASH=1110111011 GOMAGIC=GOFMAHASH] go failed (2 distinct triggers): exit status 1 Trigger string is 'GOFMAHASH triggered math.qzero:427 111111101010011110111011', repeated 6 times Trigger string is 'GOFMAHASH triggered main.main:20 010111010000101110111011', repeated 1 times Trying go args=[...], env=[GOFMAHASH=01110111011 GOMAGIC=GOFMAHASH] go failed (1 distinct triggers): exit status 1 Trigger string is 'GOFMAHASH triggered main.main:20 010111010000101110111011', repeated 1 times Review GSHS_LAST_FAIL.0.log for failing run FINISHED, suggest this command line for debugging: GOSSAFUNC='main.main:20 010111010000101110111011' \ GOFMAHASH=01110111011 GOMAGIC=GOFMAHASH go run fma.go Change-Id: Ifa22dd8f1c37c18fc8a4f7c396345a364bc367d5 Reviewed-on: https://go-review.googlesource.com/c/go/+/394754 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: David Chase <drchase@google.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
This commit is contained in:
parent
1f65c399be
commit
667c53e159
12 changed files with 250 additions and 40 deletions
|
|
@ -25,6 +25,7 @@ type DebugFlags struct {
|
||||||
DumpPtrs int `help:"show Node pointers values in dump output"`
|
DumpPtrs int `help:"show Node pointers values in dump output"`
|
||||||
DwarfInl int `help:"print information about DWARF inlined function creation"`
|
DwarfInl int `help:"print information about DWARF inlined function creation"`
|
||||||
Export int `help:"print export data"`
|
Export int `help:"print export data"`
|
||||||
|
Fmahash string `help:"hash value for use in debugging platform-dependent multiply-add use" concurrent:"ok"`
|
||||||
GCProg int `help:"print dump of GC programs"`
|
GCProg int `help:"print dump of GC programs"`
|
||||||
Gossahash string `help:"hash value for use in debugging the compiler"`
|
Gossahash string `help:"hash value for use in debugging the compiler"`
|
||||||
InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
|
InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
|
||||||
|
|
|
||||||
|
|
@ -190,6 +190,10 @@ func ParseFlags() {
|
||||||
hashDebug = NewHashDebug("gosshash", Debug.Gossahash, nil)
|
hashDebug = NewHashDebug("gosshash", Debug.Gossahash, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if Debug.Fmahash != "" {
|
||||||
|
FmaHash = NewHashDebug("fmahash", Debug.Fmahash, nil)
|
||||||
|
}
|
||||||
|
|
||||||
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
|
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
|
||||||
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)
|
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ type HashDebug struct {
|
||||||
|
|
||||||
// The default compiler-debugging HashDebug, for "-d=gossahash=..."
|
// The default compiler-debugging HashDebug, for "-d=gossahash=..."
|
||||||
var hashDebug *HashDebug
|
var hashDebug *HashDebug
|
||||||
|
var FmaHash *HashDebug
|
||||||
|
|
||||||
// DebugHashMatch reports whether debug variable Gossahash
|
// DebugHashMatch reports whether debug variable Gossahash
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -2937,18 +2937,19 @@
|
||||||
(FNEGD (FNMULD x y)) => (FMULD x y)
|
(FNEGD (FNMULD x y)) => (FMULD x y)
|
||||||
(FNMULS (FNEGS x) y) => (FMULS x y)
|
(FNMULS (FNEGS x) y) => (FMULS x y)
|
||||||
(FNMULD (FNEGD x) y) => (FMULD x y)
|
(FNMULD (FNEGD x) y) => (FMULD x y)
|
||||||
(FADDS a (FMULS x y)) => (FMADDS a x y)
|
|
||||||
(FADDD a (FMULD x y)) => (FMADDD a x y)
|
(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
|
||||||
(FSUBS a (FMULS x y)) => (FMSUBS a x y)
|
(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
|
||||||
(FSUBD a (FMULD x y)) => (FMSUBD a x y)
|
(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
|
||||||
(FSUBS (FMULS x y) a) => (FNMSUBS a x y)
|
(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
|
||||||
(FSUBD (FMULD x y) a) => (FNMSUBD a x y)
|
(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y)
|
||||||
(FADDS a (FNMULS x y)) => (FMSUBS a x y)
|
(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y)
|
||||||
(FADDD a (FNMULD x y)) => (FMSUBD a x y)
|
(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
|
||||||
(FSUBS a (FNMULS x y)) => (FMADDS a x y)
|
(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
|
||||||
(FSUBD a (FNMULD x y)) => (FMADDD a x y)
|
(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
|
||||||
(FSUBS (FNMULS x y) a) => (FNMADDS a x y)
|
(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
|
||||||
(FSUBD (FNMULD x y) a) => (FNMADDD a x y)
|
(FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y)
|
||||||
|
(FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y)
|
||||||
|
|
||||||
(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))])
|
(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))])
|
||||||
(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
|
(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
|
||||||
|
|
|
||||||
|
|
@ -942,8 +942,8 @@
|
||||||
(FNEG (F(ABS|NABS) x)) => (F(NABS|ABS) x)
|
(FNEG (F(ABS|NABS) x)) => (F(NABS|ABS) x)
|
||||||
|
|
||||||
// floating-point fused multiply-add/sub
|
// floating-point fused multiply-add/sub
|
||||||
(F(ADD|SUB) (FMUL x y) z) => (FM(ADD|SUB) x y z)
|
(F(ADD|SUB) (FMUL x y) z) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z)
|
||||||
(F(ADDS|SUBS) (FMULS x y) z) => (FM(ADDS|SUBS) x y z)
|
(F(ADDS|SUBS) (FMULS x y) z) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z)
|
||||||
|
|
||||||
// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
|
// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
|
||||||
// and convert the statements in these functions from multiple single byte loads or stores to
|
// and convert the statements in these functions from multiple single byte loads or stores to
|
||||||
|
|
|
||||||
|
|
@ -1254,8 +1254,8 @@
|
||||||
(C(G|LG)IJ {s390x.Greater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow)
|
(C(G|LG)IJ {s390x.Greater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow)
|
||||||
|
|
||||||
// fused multiply-add
|
// fused multiply-add
|
||||||
(Select0 (F(ADD|SUB) (FMUL y z) x)) => (FM(ADD|SUB) x y z)
|
(Select0 (F(ADD|SUB) (FMUL y z) x)) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z)
|
||||||
(Select0 (F(ADDS|SUBS) (FMULS y z) x)) => (FM(ADDS|SUBS) x y z)
|
(Select0 (F(ADDS|SUBS) (FMULS y z) x)) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z)
|
||||||
|
|
||||||
// Convert floating point comparisons against zero into 'load and test' instructions.
|
// Convert floating point comparisons against zero into 'load and test' instructions.
|
||||||
(F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x)
|
(F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x)
|
||||||
|
|
|
||||||
56
src/cmd/compile/internal/ssa/fmahash_test.go
Normal file
56
src/cmd/compile/internal/ssa/fmahash_test.go
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
// Copyright 2022 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package ssa_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"internal/testenv"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestFmaHash checks that the hash-test machinery works properly for a single case.
|
||||||
|
// It does not check or run the generated code.
|
||||||
|
// The test file is however a useful example of fused-vs-cascaded multiply-add.
|
||||||
|
func TestFmaHash(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("Slow test, usually avoid it, testing.Short")
|
||||||
|
}
|
||||||
|
switch runtime.GOOS {
|
||||||
|
case "linux", "darwin":
|
||||||
|
default:
|
||||||
|
t.Skipf("Slow test, usually avoid it, os=%s not linux or darwin", runtime.GOOS)
|
||||||
|
}
|
||||||
|
switch runtime.GOARCH {
|
||||||
|
case "amd64", "arm64":
|
||||||
|
default:
|
||||||
|
t.Skipf("Slow test, usually avoid it, arch=%s not amd64 or arm64", runtime.GOARCH)
|
||||||
|
}
|
||||||
|
|
||||||
|
testenv.MustHaveGoBuild(t)
|
||||||
|
gocmd := testenv.GoToolPath(t)
|
||||||
|
tmpdir, err := os.MkdirTemp("", "x")
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(tmpdir)
|
||||||
|
source := filepath.Join("testdata", "fma.go")
|
||||||
|
output := filepath.Join(tmpdir, "fma.exe")
|
||||||
|
cmd := exec.Command(gocmd, "build", "-o", output, source)
|
||||||
|
cmd.Env = append(cmd.Env, "GOCOMPILEDEBUG=fmahash=101111101101111001110110", "GOOS=linux", "GOARCH=arm64", "HOME="+tmpdir)
|
||||||
|
t.Logf("%v", cmd)
|
||||||
|
t.Logf("%v", cmd.Env)
|
||||||
|
b, e := cmd.CombinedOutput()
|
||||||
|
if e != nil {
|
||||||
|
t.Error(e)
|
||||||
|
}
|
||||||
|
s := string(b) // Looking for "GOFMAHASH triggered main.main:24"
|
||||||
|
if !strings.Contains(s, "fmahash triggered main.main:24") {
|
||||||
|
t.Errorf("Expected to see 'fmahash triggered main.main:24' in \n-----\n%s-----", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -801,3 +801,17 @@ func (f *Func) spSb() (sp, sb *Value) {
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// useFMA allows targeted debugging w/ GOFMAHASH
|
||||||
|
// If you have an architecture-dependent FP glitch, this will help you find it.
|
||||||
|
func (f *Func) useFMA(v *Value) bool {
|
||||||
|
if !f.Config.UseFMA {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if base.FmaHash == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
name := f.fe.MyImportPath() + "." + f.Name
|
||||||
|
return base.FmaHash.DebugHashMatchParam(name, uint64(v.Pos.Line()))
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -4342,6 +4342,7 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FADDD a (FMULD x y))
|
// match: (FADDD a (FMULD x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMADDD a x y)
|
// result: (FMADDD a x y)
|
||||||
for {
|
for {
|
||||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
|
@ -4351,6 +4352,9 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
v.reset(OpARM64FMADDD)
|
v.reset(OpARM64FMADDD)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
|
|
@ -4358,6 +4362,7 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
// match: (FADDD a (FNMULD x y))
|
// match: (FADDD a (FNMULD x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMSUBD a x y)
|
// result: (FMSUBD a x y)
|
||||||
for {
|
for {
|
||||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
|
@ -4367,6 +4372,9 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
v.reset(OpARM64FMSUBD)
|
v.reset(OpARM64FMSUBD)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
|
|
@ -4379,6 +4387,7 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FADDS a (FMULS x y))
|
// match: (FADDS a (FMULS x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMADDS a x y)
|
// result: (FMADDS a x y)
|
||||||
for {
|
for {
|
||||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
|
@ -4388,6 +4397,9 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
v.reset(OpARM64FMADDS)
|
v.reset(OpARM64FMADDS)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
|
|
@ -4395,6 +4407,7 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
// match: (FADDS a (FNMULS x y))
|
// match: (FADDS a (FNMULS x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMSUBS a x y)
|
// result: (FMSUBS a x y)
|
||||||
for {
|
for {
|
||||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
|
@ -4404,6 +4417,9 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
v.reset(OpARM64FMSUBS)
|
v.reset(OpARM64FMSUBS)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
|
|
@ -5458,6 +5474,7 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FSUBD a (FMULD x y))
|
// match: (FSUBD a (FMULD x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMSUBD a x y)
|
// result: (FMSUBD a x y)
|
||||||
for {
|
for {
|
||||||
a := v_0
|
a := v_0
|
||||||
|
|
@ -5466,11 +5483,15 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FMSUBD)
|
v.reset(OpARM64FMSUBD)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (FSUBD (FMULD x y) a)
|
// match: (FSUBD (FMULD x y) a)
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FNMSUBD a x y)
|
// result: (FNMSUBD a x y)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpARM64FMULD {
|
if v_0.Op != OpARM64FMULD {
|
||||||
|
|
@ -5479,11 +5500,15 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
|
||||||
y := v_0.Args[1]
|
y := v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
x := v_0.Args[0]
|
||||||
a := v_1
|
a := v_1
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FNMSUBD)
|
v.reset(OpARM64FNMSUBD)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (FSUBD a (FNMULD x y))
|
// match: (FSUBD a (FNMULD x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMADDD a x y)
|
// result: (FMADDD a x y)
|
||||||
for {
|
for {
|
||||||
a := v_0
|
a := v_0
|
||||||
|
|
@ -5492,11 +5517,15 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FMADDD)
|
v.reset(OpARM64FMADDD)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (FSUBD (FNMULD x y) a)
|
// match: (FSUBD (FNMULD x y) a)
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FNMADDD a x y)
|
// result: (FNMADDD a x y)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpARM64FNMULD {
|
if v_0.Op != OpARM64FNMULD {
|
||||||
|
|
@ -5505,6 +5534,9 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
|
||||||
y := v_0.Args[1]
|
y := v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
x := v_0.Args[0]
|
||||||
a := v_1
|
a := v_1
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FNMADDD)
|
v.reset(OpARM64FNMADDD)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
|
|
@ -5515,6 +5547,7 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FSUBS a (FMULS x y))
|
// match: (FSUBS a (FMULS x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMSUBS a x y)
|
// result: (FMSUBS a x y)
|
||||||
for {
|
for {
|
||||||
a := v_0
|
a := v_0
|
||||||
|
|
@ -5523,11 +5556,15 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FMSUBS)
|
v.reset(OpARM64FMSUBS)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (FSUBS (FMULS x y) a)
|
// match: (FSUBS (FMULS x y) a)
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FNMSUBS a x y)
|
// result: (FNMSUBS a x y)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpARM64FMULS {
|
if v_0.Op != OpARM64FMULS {
|
||||||
|
|
@ -5536,11 +5573,15 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool {
|
||||||
y := v_0.Args[1]
|
y := v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
x := v_0.Args[0]
|
||||||
a := v_1
|
a := v_1
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FNMSUBS)
|
v.reset(OpARM64FNMSUBS)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (FSUBS a (FNMULS x y))
|
// match: (FSUBS a (FNMULS x y))
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FMADDS a x y)
|
// result: (FMADDS a x y)
|
||||||
for {
|
for {
|
||||||
a := v_0
|
a := v_0
|
||||||
|
|
@ -5549,11 +5590,15 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool {
|
||||||
}
|
}
|
||||||
y := v_1.Args[1]
|
y := v_1.Args[1]
|
||||||
x := v_1.Args[0]
|
x := v_1.Args[0]
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FMADDS)
|
v.reset(OpARM64FMADDS)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (FSUBS (FNMULS x y) a)
|
// match: (FSUBS (FNMULS x y) a)
|
||||||
|
// cond: a.Block.Func.useFMA(v)
|
||||||
// result: (FNMADDS a x y)
|
// result: (FNMADDS a x y)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpARM64FNMULS {
|
if v_0.Op != OpARM64FNMULS {
|
||||||
|
|
@ -5562,6 +5607,9 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool {
|
||||||
y := v_0.Args[1]
|
y := v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
x := v_0.Args[0]
|
||||||
a := v_1
|
a := v_1
|
||||||
|
if !(a.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpARM64FNMADDS)
|
v.reset(OpARM64FNMADDS)
|
||||||
v.AddArg3(a, x, y)
|
v.AddArg3(a, x, y)
|
||||||
return true
|
return true
|
||||||
|
|
|
||||||
|
|
@ -4655,18 +4655,27 @@ func rewriteValuePPC64_OpPPC64FADD(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FADD (FMUL x y) z)
|
// match: (FADD (FMUL x y) z)
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMADD x y z)
|
// result: (FMADD x y z)
|
||||||
for {
|
for {
|
||||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
if v_0.Op != OpPPC64FMUL {
|
if v_0.Op != OpPPC64FMUL {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y := v_0.Args[1]
|
_ = v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
v_0_0 := v_0.Args[0]
|
||||||
z := v_1
|
v_0_1 := v_0.Args[1]
|
||||||
v.reset(OpPPC64FMADD)
|
for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
|
||||||
v.AddArg3(x, y, z)
|
x := v_0_0
|
||||||
return true
|
y := v_0_1
|
||||||
|
z := v_1
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64FMADD)
|
||||||
|
v.AddArg3(x, y, z)
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -4676,18 +4685,27 @@ func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FADDS (FMULS x y) z)
|
// match: (FADDS (FMULS x y) z)
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMADDS x y z)
|
// result: (FMADDS x y z)
|
||||||
for {
|
for {
|
||||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
if v_0.Op != OpPPC64FMULS {
|
if v_0.Op != OpPPC64FMULS {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
y := v_0.Args[1]
|
_ = v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
v_0_0 := v_0.Args[0]
|
||||||
z := v_1
|
v_0_1 := v_0.Args[1]
|
||||||
v.reset(OpPPC64FMADDS)
|
for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
|
||||||
v.AddArg3(x, y, z)
|
x := v_0_0
|
||||||
return true
|
y := v_0_1
|
||||||
|
z := v_1
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64FMADDS)
|
||||||
|
v.AddArg3(x, y, z)
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -5078,17 +5096,27 @@ func rewriteValuePPC64_OpPPC64FSUB(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FSUB (FMUL x y) z)
|
// match: (FSUB (FMUL x y) z)
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMSUB x y z)
|
// result: (FMSUB x y z)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpPPC64FMUL {
|
if v_0.Op != OpPPC64FMUL {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
y := v_0.Args[1]
|
_ = v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
v_0_0 := v_0.Args[0]
|
||||||
z := v_1
|
v_0_1 := v_0.Args[1]
|
||||||
v.reset(OpPPC64FMSUB)
|
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
|
||||||
v.AddArg3(x, y, z)
|
x := v_0_0
|
||||||
return true
|
y := v_0_1
|
||||||
|
z := v_1
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64FMSUB)
|
||||||
|
v.AddArg3(x, y, z)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
@ -5096,17 +5124,27 @@ func rewriteValuePPC64_OpPPC64FSUBS(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
// match: (FSUBS (FMULS x y) z)
|
// match: (FSUBS (FMULS x y) z)
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMSUBS x y z)
|
// result: (FMSUBS x y z)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpPPC64FMULS {
|
if v_0.Op != OpPPC64FMULS {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
y := v_0.Args[1]
|
_ = v_0.Args[1]
|
||||||
x := v_0.Args[0]
|
v_0_0 := v_0.Args[0]
|
||||||
z := v_1
|
v_0_1 := v_0.Args[1]
|
||||||
v.reset(OpPPC64FMSUBS)
|
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
|
||||||
v.AddArg3(x, y, z)
|
x := v_0_0
|
||||||
return true
|
y := v_0_1
|
||||||
|
z := v_1
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64FMSUBS)
|
||||||
|
v.AddArg3(x, y, z)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15335,6 +15335,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Select0 (FADD (FMUL y z) x))
|
// match: (Select0 (FADD (FMUL y z) x))
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMADD x y z)
|
// result: (FMADD x y z)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpS390XFADD {
|
if v_0.Op != OpS390XFADD {
|
||||||
|
|
@ -15350,6 +15351,9 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
z := v_0_0.Args[1]
|
z := v_0_0.Args[1]
|
||||||
y := v_0_0.Args[0]
|
y := v_0_0.Args[0]
|
||||||
x := v_0_1
|
x := v_0_1
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
v.reset(OpS390XFMADD)
|
v.reset(OpS390XFMADD)
|
||||||
v.AddArg3(x, y, z)
|
v.AddArg3(x, y, z)
|
||||||
return true
|
return true
|
||||||
|
|
@ -15357,6 +15361,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
// match: (Select0 (FSUB (FMUL y z) x))
|
// match: (Select0 (FSUB (FMUL y z) x))
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMSUB x y z)
|
// result: (FMSUB x y z)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpS390XFSUB {
|
if v_0.Op != OpS390XFSUB {
|
||||||
|
|
@ -15369,11 +15374,15 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
}
|
}
|
||||||
z := v_0_0.Args[1]
|
z := v_0_0.Args[1]
|
||||||
y := v_0_0.Args[0]
|
y := v_0_0.Args[0]
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpS390XFMSUB)
|
v.reset(OpS390XFMSUB)
|
||||||
v.AddArg3(x, y, z)
|
v.AddArg3(x, y, z)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// match: (Select0 (FADDS (FMULS y z) x))
|
// match: (Select0 (FADDS (FMULS y z) x))
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMADDS x y z)
|
// result: (FMADDS x y z)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpS390XFADDS {
|
if v_0.Op != OpS390XFADDS {
|
||||||
|
|
@ -15389,6 +15398,9 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
z := v_0_0.Args[1]
|
z := v_0_0.Args[1]
|
||||||
y := v_0_0.Args[0]
|
y := v_0_0.Args[0]
|
||||||
x := v_0_1
|
x := v_0_1
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
v.reset(OpS390XFMADDS)
|
v.reset(OpS390XFMADDS)
|
||||||
v.AddArg3(x, y, z)
|
v.AddArg3(x, y, z)
|
||||||
return true
|
return true
|
||||||
|
|
@ -15396,6 +15408,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
// match: (Select0 (FSUBS (FMULS y z) x))
|
// match: (Select0 (FSUBS (FMULS y z) x))
|
||||||
|
// cond: x.Block.Func.useFMA(v)
|
||||||
// result: (FMSUBS x y z)
|
// result: (FMSUBS x y z)
|
||||||
for {
|
for {
|
||||||
if v_0.Op != OpS390XFSUBS {
|
if v_0.Op != OpS390XFSUBS {
|
||||||
|
|
@ -15408,6 +15421,9 @@ func rewriteValueS390X_OpSelect0(v *Value) bool {
|
||||||
}
|
}
|
||||||
z := v_0_0.Args[1]
|
z := v_0_0.Args[1]
|
||||||
y := v_0_0.Args[0]
|
y := v_0_0.Args[0]
|
||||||
|
if !(x.Block.Func.useFMA(v)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
v.reset(OpS390XFMSUBS)
|
v.reset(OpS390XFMSUBS)
|
||||||
v.AddArg3(x, y, z)
|
v.AddArg3(x, y, z)
|
||||||
return true
|
return true
|
||||||
|
|
|
||||||
31
src/cmd/compile/internal/ssa/testdata/fma.go
vendored
Normal file
31
src/cmd/compile/internal/ssa/testdata/fma.go
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
// Copyright 2022 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func f(x float64) float64 {
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
w, x, y := 1.0, 1.0, 1.0
|
||||||
|
x = f(x + x/(1<<52))
|
||||||
|
w = f(w / (1 << 27))
|
||||||
|
y = f(y + y/(1<<52))
|
||||||
|
w0 := f(2 * w * (1 - w))
|
||||||
|
w1 := f(w * (1 + w))
|
||||||
|
x = x + w0*w1 // GOFMAHASH=101111101101111001110110
|
||||||
|
y = y + f(w0*w1)
|
||||||
|
fmt.Println(x, y, x-y)
|
||||||
|
|
||||||
|
if x != y {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue