cmd/compile: emit classify instructions for infinity tests on riscv64

The 'classify' instruction on RISC-V sets a bit in a mask to indicate
the class a floating point value belongs to (e.g. whether the value is
an infinity, a normal number, a subnormal number and so on). There are
other places this instruction is useful but for now I've just used it
for infinity tests.

The gains are relatively small (~1-2 instructions per IsInf call) but
using FCLASSD does potentially unlock further optimizations. It also
reduces the number of loads from memory and the number of moves
between general purpose and floating point register files.

goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │        sec/op        │   sec/op     vs base                │
Acos                           159.9n ± 0%   173.7n ± 0%   +8.66% (p=0.000 n=10)
Acosh                          249.8n ± 0%   254.4n ± 0%   +1.86% (p=0.000 n=10)
Asin                           159.9n ± 0%   173.7n ± 0%   +8.66% (p=0.000 n=10)
Asinh                          292.2n ± 0%   283.0n ± 0%   -3.15% (p=0.000 n=10)
Atan                           119.1n ± 0%   119.0n ± 0%   -0.08% (p=0.036 n=10)
Atanh                          265.1n ± 0%   271.6n ± 0%   +2.43% (p=0.000 n=10)
Atan2                          194.9n ± 0%   186.7n ± 0%   -4.23% (p=0.000 n=10)
Cbrt                           216.3n ± 0%   203.1n ± 0%   -6.10% (p=0.000 n=10)
Ceil                           31.82n ± 0%   31.81n ± 0%        ~ (p=0.063 n=10)
Copysign                       4.897n ± 0%   4.893n ± 3%   -0.08% (p=0.038 n=10)
Cos                            123.9n ± 0%   107.7n ± 1%  -13.03% (p=0.000 n=10)
Cosh                           293.0n ± 0%   264.6n ± 0%   -9.68% (p=0.000 n=10)
Erf                            150.0n ± 0%   133.8n ± 0%  -10.80% (p=0.000 n=10)
Erfc                           151.8n ± 0%   137.9n ± 0%   -9.16% (p=0.000 n=10)
Erfinv                         173.8n ± 0%   173.8n ± 0%        ~ (p=0.820 n=10)
Erfcinv                        173.8n ± 0%   173.8n ± 0%        ~ (p=1.000 n=10)
Exp                            247.7n ± 0%   220.4n ± 0%  -11.04% (p=0.000 n=10)
ExpGo                          261.4n ± 0%   232.5n ± 0%  -11.04% (p=0.000 n=10)
Expm1                          176.2n ± 0%   164.9n ± 0%   -6.41% (p=0.000 n=10)
Exp2                           220.4n ± 0%   190.2n ± 0%  -13.70% (p=0.000 n=10)
Exp2Go                         232.5n ± 0%   204.0n ± 0%  -12.22% (p=0.000 n=10)
Abs                            4.897n ± 0%   4.897n ± 0%        ~ (p=0.726 n=10)
Dim                            16.32n ± 0%   16.31n ± 0%        ~ (p=0.770 n=10)
Floor                          31.84n ± 0%   31.83n ± 0%        ~ (p=0.677 n=10)
Max                            26.11n ± 0%   26.13n ± 0%        ~ (p=0.290 n=10)
Min                            26.10n ± 0%   26.11n ± 0%        ~ (p=0.424 n=10)
Mod                            416.2n ± 0%   337.8n ± 0%  -18.83% (p=0.000 n=10)
Frexp                          63.65n ± 0%   50.60n ± 0%  -20.50% (p=0.000 n=10)
Gamma                          218.8n ± 0%   206.4n ± 0%   -5.62% (p=0.000 n=10)
Hypot                          92.20n ± 0%   94.69n ± 0%   +2.70% (p=0.000 n=10)
HypotGo                        107.7n ± 0%   109.3n ± 0%   +1.49% (p=0.000 n=10)
Ilogb                          59.54n ± 0%   44.04n ± 0%  -26.04% (p=0.000 n=10)
J0                             708.9n ± 0%   674.5n ± 0%   -4.86% (p=0.000 n=10)
J1                             707.6n ± 0%   676.1n ± 0%   -4.44% (p=0.000 n=10)
Jn                             1.513µ ± 0%   1.427µ ± 0%   -5.68% (p=0.000 n=10)
Ldexp                          70.20n ± 0%   57.09n ± 0%  -18.68% (p=0.000 n=10)
Lgamma                         201.5n ± 0%   185.3n ± 1%   -8.01% (p=0.000 n=10)
Log                            201.5n ± 0%   182.7n ± 0%   -9.35% (p=0.000 n=10)
Logb                           59.54n ± 0%   46.53n ± 0%  -21.86% (p=0.000 n=10)
Log1p                          178.8n ± 0%   173.9n ± 6%   -2.74% (p=0.021 n=10)
Log10                          201.4n ± 0%   184.3n ± 0%   -8.49% (p=0.000 n=10)
Log2                           79.17n ± 0%   66.07n ± 0%  -16.54% (p=0.000 n=10)
Modf                           34.27n ± 0%   34.25n ± 0%        ~ (p=0.559 n=10)
Nextafter32                    49.34n ± 0%   49.37n ± 0%   +0.05% (p=0.040 n=10)
Nextafter64                    43.66n ± 0%   43.66n ± 0%        ~ (p=0.869 n=10)
PowInt                         309.1n ± 0%   267.4n ± 0%  -13.49% (p=0.000 n=10)
PowFrac                        769.6n ± 0%   677.3n ± 0%  -11.98% (p=0.000 n=10)
Pow10Pos                       13.88n ± 0%   13.88n ± 0%        ~ (p=0.811 n=10)
Pow10Neg                       19.58n ± 0%   19.57n ± 0%        ~ (p=0.993 n=10)
Round                          23.65n ± 0%   23.66n ± 0%        ~ (p=0.354 n=10)
RoundToEven                    27.75n ± 0%   27.75n ± 0%        ~ (p=0.971 n=10)
Remainder                      380.0n ± 0%   309.9n ± 0%  -18.45% (p=0.000 n=10)
Signbit                        13.06n ± 0%   13.06n ± 0%        ~ (p=1.000 n=10)
Sin                            133.8n ± 0%   120.8n ± 0%   -9.75% (p=0.000 n=10)
Sincos                         160.7n ± 0%   147.7n ± 0%   -8.12% (p=0.000 n=10)
Sinh                           305.9n ± 0%   277.9n ± 0%   -9.17% (p=0.000 n=10)
SqrtIndirect                   3.265n ± 0%   3.264n ± 0%        ~ (p=0.546 n=10)
SqrtLatency                    19.58n ± 0%   19.58n ± 0%        ~ (p=0.973 n=10)
SqrtIndirectLatency            19.59n ± 0%   19.58n ± 0%        ~ (p=0.370 n=10)
SqrtGoLatency                  205.7n ± 0%   202.7n ± 0%   -1.46% (p=0.000 n=10)
SqrtPrime                      4.953µ ± 0%   4.954µ ± 0%        ~ (p=0.477 n=10)
Tan                            163.2n ± 0%   150.2n ± 0%   -7.99% (p=0.000 n=10)
Tanh                           312.4n ± 0%   284.2n ± 0%   -9.01% (p=0.000 n=10)
Trunc                          31.83n ± 0%   31.83n ± 0%        ~ (p=0.663 n=10)
Y0                             701.0n ± 0%   669.2n ± 0%   -4.54% (p=0.000 n=10)
Y1                             704.5n ± 0%   672.4n ± 0%   -4.55% (p=0.000 n=10)
Yn                             1.490µ ± 0%   1.422µ ± 0%   -4.60% (p=0.000 n=10)
Float64bits                    5.713n ± 0%   5.710n ± 0%        ~ (p=0.926 n=10)
Float64frombits                4.896n ± 0%   4.896n ± 0%        ~ (p=0.663 n=10)
Float32bits                    12.25n ± 0%   12.25n ± 0%        ~ (p=0.571 n=10)
Float32frombits                4.898n ± 0%   4.896n ± 0%        ~ (p=0.754 n=10)
FMA                            4.895n ± 0%   4.895n ± 0%        ~ (p=0.745 n=10)
geomean                        94.40n        89.43n        -5.27%

Change-Id: I4fe0f2e9f609e38d79463f9ba2519a3f9427432e
Reviewed-on: https://go-review.googlesource.com/c/go/+/348389
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Michael Munday 2021-09-08 13:48:48 +00:00 committed by Meng Zhuo
parent ca66f907dd
commit 320df537cc
7 changed files with 426 additions and 0 deletions

View file

@ -420,6 +420,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD, ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD,
ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
ssa.OpRISCV64FCLASSS, ssa.OpRISCV64FCLASSD,
ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW, ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW,
ssa.OpRISCV64REV8, ssa.OpRISCV64CPOP, ssa.OpRISCV64CPOPW: ssa.OpRISCV64REV8, ssa.OpRISCV64CPOP, ssa.OpRISCV64CPOPW:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())

View file

@ -862,6 +862,18 @@
(F(MADD|NMADD|MSUB|NMSUB)D neg:(FNEGD x) y z) && neg.Uses == 1 => (F(NMSUB|MSUB|NMADD|MADD)D x y z) (F(MADD|NMADD|MSUB|NMSUB)D neg:(FNEGD x) y z) && neg.Uses == 1 => (F(NMSUB|MSUB|NMADD|MADD)D x y z)
(F(MADD|NMADD|MSUB|NMSUB)D x y neg:(FNEGD z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)D x y z) (F(MADD|NMADD|MSUB|NMSUB)D x y neg:(FNEGD z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)D x y z)
// Test for -∞ (bit 0) using 64 bit classify instruction.
(FLTD x (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))]))) => (ANDI [1] (FCLASSD x))
(FLED (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))])) x) => (SNEZ (ANDI <typ.Int64> [0xff &^ 1] (FCLASSD x)))
(FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))]))) => (ANDI [1] (FCLASSD x))
(FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))]))) => (SEQZ (ANDI <typ.Int64> [1] (FCLASSD x)))
// Test for +∞ (bit 7) using 64 bit classify instruction.
(FLTD (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))])) x) => (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
(FLED x (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))]))) => (SNEZ (ANDI <typ.Int64> [0xff &^ (1<<7)] (FCLASSD x)))
(FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))]))) => (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
(FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))]))) => (SEQZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
// //
// Optimisations for rva22u64 and above. // Optimisations for rva22u64 and above.
// //

View file

@ -497,6 +497,27 @@ func init() {
{name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"}, // arg0 <= arg1 {name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"}, // arg0 <= arg1
{name: "LoweredFMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1) {name: "LoweredFMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1)
{name: "LoweredFMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"}, // max(arg0, arg1) {name: "LoweredFMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"}, // max(arg0, arg1)
// Floating point classify (in the F and D extensions).
//
// The FCLASS instructions will always set exactly one bit in the output
// register, all other bits will be cleared.
//
// Bit | Class
// ====+=============================
// 0 | -∞
// 1 | a negative normal number
// 2 | a negative subnormal number
// 3 | -0
// 4 | +0
// 5 | a positive subnormal number
// 6 | a positive normal number
// 7 | +∞
// 8 | qNaN
// 9 | sNaN
// ====+=============================
{name: "FCLASSS", argLength: 1, reg: fpgp, asm: "FCLASSS", typ: "Int64"}, // classify float32
{name: "FCLASSD", argLength: 1, reg: fpgp, asm: "FCLASSD", typ: "Int64"}, // classify float64
} }
RISCV64blocks := []blockData{ RISCV64blocks := []blockData{

View file

@ -2644,6 +2644,8 @@ const (
OpRISCV64FLED OpRISCV64FLED
OpRISCV64LoweredFMIND OpRISCV64LoweredFMIND
OpRISCV64LoweredFMAXD OpRISCV64LoweredFMAXD
OpRISCV64FCLASSS
OpRISCV64FCLASSD
OpS390XFADDS OpS390XFADDS
OpS390XFADD OpS390XFADD
@ -35611,6 +35613,32 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "FCLASSS",
argLen: 1,
asm: riscv.AFCLASSS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
},
},
{
name: "FCLASSD",
argLen: 1,
asm: riscv.AFCLASSD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
},
},
},
{ {
name: "FADDS", name: "FADDS",

View file

@ -513,6 +513,12 @@ func rewriteValueRISCV64(v *Value) bool {
return rewriteValueRISCV64_OpRISCV64FADDD(v) return rewriteValueRISCV64_OpRISCV64FADDD(v)
case OpRISCV64FADDS: case OpRISCV64FADDS:
return rewriteValueRISCV64_OpRISCV64FADDS(v) return rewriteValueRISCV64_OpRISCV64FADDS(v)
case OpRISCV64FEQD:
return rewriteValueRISCV64_OpRISCV64FEQD(v)
case OpRISCV64FLED:
return rewriteValueRISCV64_OpRISCV64FLED(v)
case OpRISCV64FLTD:
return rewriteValueRISCV64_OpRISCV64FLTD(v)
case OpRISCV64FMADDD: case OpRISCV64FMADDD:
return rewriteValueRISCV64_OpRISCV64FMADDD(v) return rewriteValueRISCV64_OpRISCV64FMADDD(v)
case OpRISCV64FMADDS: case OpRISCV64FMADDS:
@ -529,6 +535,8 @@ func rewriteValueRISCV64(v *Value) bool {
return rewriteValueRISCV64_OpRISCV64FMSUBD(v) return rewriteValueRISCV64_OpRISCV64FMSUBD(v)
case OpRISCV64FMSUBS: case OpRISCV64FMSUBS:
return rewriteValueRISCV64_OpRISCV64FMSUBS(v) return rewriteValueRISCV64_OpRISCV64FMSUBS(v)
case OpRISCV64FNED:
return rewriteValueRISCV64_OpRISCV64FNED(v)
case OpRISCV64FNMADDD: case OpRISCV64FNMADDD:
return rewriteValueRISCV64_OpRISCV64FNMADDD(v) return rewriteValueRISCV64_OpRISCV64FNMADDD(v)
case OpRISCV64FNMADDS: case OpRISCV64FNMADDS:
@ -3762,6 +3770,149 @@ func rewriteValueRISCV64_OpRISCV64FADDS(v *Value) bool {
} }
return false return false
} }
func rewriteValueRISCV64_OpRISCV64FEQD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))])))
// result: (ANDI [1] (FCLASSD x))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpRISCV64FMVDX {
continue
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(-1))) {
continue
}
v.reset(OpRISCV64ANDI)
v.AuxInt = int64ToAuxInt(1)
v0 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
break
}
// match: (FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))])))
// result: (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpRISCV64FMVDX {
continue
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(1))) {
continue
}
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
v0.AuxInt = int64ToAuxInt(1 << 7)
v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
break
}
return false
}
func rewriteValueRISCV64_OpRISCV64FLED(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (FLED (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))])) x)
// result: (SNEZ (ANDI <typ.Int64> [0xff &^ 1] (FCLASSD x)))
for {
if v_0.Op != OpRISCV64FMVDX {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0_0.AuxInt) != int64(math.Float64bits(-math.MaxFloat64)) {
break
}
x := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
v0.AuxInt = int64ToAuxInt(0xff &^ 1)
v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
// match: (FLED x (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))])))
// result: (SNEZ (ANDI <typ.Int64> [0xff &^ (1<<7)] (FCLASSD x)))
for {
x := v_0
if v_1.Op != OpRISCV64FMVDX {
break
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.MaxFloat64)) {
break
}
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
v0.AuxInt = int64ToAuxInt(0xff &^ (1 << 7))
v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64FLTD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (FLTD x (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))])))
// result: (ANDI [1] (FCLASSD x))
for {
x := v_0
if v_1.Op != OpRISCV64FMVDX {
break
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(-math.MaxFloat64)) {
break
}
v.reset(OpRISCV64ANDI)
v.AuxInt = int64ToAuxInt(1)
v0 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (FLTD (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))])) x)
// result: (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
for {
if v_0.Op != OpRISCV64FMVDX {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0_0.AuxInt) != int64(math.Float64bits(math.MaxFloat64)) {
break
}
x := v_1
v.reset(OpRISCV64SNEZ)
v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
v0.AuxInt = int64ToAuxInt(1 << 7)
v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueRISCV64_OpRISCV64FMADDD(v *Value) bool { func rewriteValueRISCV64_OpRISCV64FMADDD(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]
@ -4186,6 +4337,59 @@ func rewriteValueRISCV64_OpRISCV64FMSUBS(v *Value) bool {
} }
return false return false
} }
func rewriteValueRISCV64_OpRISCV64FNED(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))])))
// result: (SEQZ (ANDI <typ.Int64> [1] (FCLASSD x)))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpRISCV64FMVDX {
continue
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(-1))) {
continue
}
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
v0.AuxInt = int64ToAuxInt(1)
v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
break
}
// match: (FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))])))
// result: (SEQZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpRISCV64FMVDX {
continue
}
v_1_0 := v_1.Args[0]
if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(1))) {
continue
}
v.reset(OpRISCV64SEQZ)
v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
v0.AuxInt = int64ToAuxInt(1 << 7)
v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
break
}
return false
}
func rewriteValueRISCV64_OpRISCV64FNMADDD(v *Value) bool { func rewriteValueRISCV64_OpRISCV64FNMADDD(v *Value) bool {
v_2 := v.Args[2] v_2 := v.Args[2]
v_1 := v.Args[1] v_1 := v.Args[1]

View file

@ -523,6 +523,106 @@ func TestFloatSignalingNaNConversionConst(t *testing.T) {
} }
} }
//go:noinline
func isPosInf(x float64) bool {
return math.IsInf(x, 1)
}
//go:noinline
func isPosInfEq(x float64) bool {
return x == math.Inf(1)
}
//go:noinline
func isPosInfCmp(x float64) bool {
return x > math.MaxFloat64
}
//go:noinline
func isNotPosInf(x float64) bool {
return !math.IsInf(x, 1)
}
//go:noinline
func isNotPosInfEq(x float64) bool {
return x != math.Inf(1)
}
//go:noinline
func isNotPosInfCmp(x float64) bool {
return x <= math.MaxFloat64
}
//go:noinline
func isNegInf(x float64) bool {
return math.IsInf(x, -1)
}
//go:noinline
func isNegInfEq(x float64) bool {
return x == math.Inf(-1)
}
//go:noinline
func isNegInfCmp(x float64) bool {
return x < -math.MaxFloat64
}
//go:noinline
func isNotNegInf(x float64) bool {
return !math.IsInf(x, -1)
}
//go:noinline
func isNotNegInfEq(x float64) bool {
return x != math.Inf(-1)
}
//go:noinline
func isNotNegInfCmp(x float64) bool {
return x >= -math.MaxFloat64
}
func TestInf(t *testing.T) {
tests := []struct {
value float64
isPosInf bool
isNegInf bool
isNaN bool
}{
{value: math.Inf(1), isPosInf: true},
{value: math.MaxFloat64},
{value: math.Inf(-1), isNegInf: true},
{value: -math.MaxFloat64},
{value: math.NaN(), isNaN: true},
}
check := func(name string, f func(x float64) bool, value float64, want bool) {
got := f(value)
if got != want {
t.Errorf("%v(%g): want %v, got %v", name, value, want, got)
}
}
for _, test := range tests {
check("isPosInf", isPosInf, test.value, test.isPosInf)
check("isPosInfEq", isPosInfEq, test.value, test.isPosInf)
check("isPosInfCmp", isPosInfCmp, test.value, test.isPosInf)
check("isNotPosInf", isNotPosInf, test.value, !test.isPosInf)
check("isNotPosInfEq", isNotPosInfEq, test.value, !test.isPosInf)
check("isNotPosInfCmp", isNotPosInfCmp, test.value, !test.isPosInf && !test.isNaN)
check("isNegInf", isNegInf, test.value, test.isNegInf)
check("isNegInfEq", isNegInfEq, test.value, test.isNegInf)
check("isNegInfCmp", isNegInfCmp, test.value, test.isNegInf)
check("isNotNegInf", isNotNegInf, test.value, !test.isNegInf)
check("isNotNegInfEq", isNotNegInfEq, test.value, !test.isNegInf)
check("isNotNegInfCmp", isNotNegInfCmp, test.value, !test.isNegInf && !test.isNaN)
}
}
var sinkFloat float64 var sinkFloat float64
func BenchmarkMul2(b *testing.B) { func BenchmarkMul2(b *testing.B) {

View file

@ -154,6 +154,66 @@ func fnma(x, y, z float64) float64 {
return math.FMA(x, -y, -z) return math.FMA(x, -y, -z)
} }
func isPosInf(x float64) bool {
// riscv64:"FCLASSD"
return math.IsInf(x, 1)
}
func isPosInfEq(x float64) bool {
// riscv64:"FCLASSD"
return x == math.Inf(1)
}
func isPosInfCmp(x float64) bool {
// riscv64:"FCLASSD"
return x > math.MaxFloat64
}
func isNotPosInf(x float64) bool {
// riscv64:"FCLASSD"
return !math.IsInf(x, 1)
}
func isNotPosInfEq(x float64) bool {
// riscv64:"FCLASSD"
return x != math.Inf(1)
}
func isNotPosInfCmp(x float64) bool {
// riscv64:"FCLASSD"
return x <= math.MaxFloat64
}
func isNegInf(x float64) bool {
// riscv64:"FCLASSD"
return math.IsInf(x, -1)
}
func isNegInfEq(x float64) bool {
// riscv64:"FCLASSD"
return x == math.Inf(-1)
}
func isNegInfCmp(x float64) bool {
// riscv64:"FCLASSD"
return x < -math.MaxFloat64
}
func isNotNegInf(x float64) bool {
// riscv64:"FCLASSD"
return !math.IsInf(x, -1)
}
func isNotNegInfEq(x float64) bool {
// riscv64:"FCLASSD"
return x != math.Inf(-1)
}
func isNotNegInfCmp(x float64) bool {
// riscv64:"FCLASSD"
return x >= -math.MaxFloat64
}
func fromFloat64(f64 float64) uint64 { func fromFloat64(f64 float64) uint64 {
// amd64:"MOVQ\tX.*, [^X].*" // amd64:"MOVQ\tX.*, [^X].*"
// arm64:"FMOVD\tF.*, R.*" // arm64:"FMOVD\tF.*, R.*"