From 97fd6bdeccf8c59f07dadbff8f614ea4169f01b1 Mon Sep 17 00:00:00 2001
From: Michael Munday <mndygolang+git@gmail.com>
Date: Mon, 18 Aug 2025 22:51:36 +0100
Subject: [PATCH] cmd/compile: fuse NaN checks with other comparisons
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NaN checks can often be merged into other comparisons by inverting them.
For example, `math.IsNaN(x) || x > 0` is equivalent to `!(x <= 0)`.

goos: linux
goarch: amd64
pkg: math
cpu: 12th Gen Intel(R) Core(TM) i7-12700T
            │         sec/op         │    sec/op     vs base                │
Acos                     4.315n ± 0%    4.314n ± 0%        ~ (p=0.642 n=10)
Acosh                    8.398n ± 0%    7.779n ± 0%   -7.37% (p=0.000 n=10)
Asin                     4.203n ± 0%    4.211n ± 0%   +0.20% (p=0.001 n=10)
Asinh                   10.150n ± 0%    9.562n ± 0%   -5.79% (p=0.000 n=10)
Atan                     2.363n ± 0%    2.363n ± 0%        ~ (p=0.801 n=10)
Atanh                    8.192n ± 2%    7.685n ± 0%   -6.20% (p=0.000 n=10)
Atan2                    4.013n ± 0%    4.010n ± 0%        ~ (p=0.073 n=10)
Cbrt                     4.858n ± 0%    4.755n ± 0%   -2.12% (p=0.000 n=10)
Cos                      4.596n ± 0%    4.357n ± 0%   -5.20% (p=0.000 n=10)
Cosh                     5.071n ± 0%    5.071n ± 0%        ~ (p=0.585 n=10)
Erf                      2.802n ± 1%    2.788n ± 0%   -0.54% (p=0.002 n=10)
Erfc                     3.087n ± 1%    3.071n ± 0%        ~ (p=0.320 n=10)
Erfinv                   3.981n ± 0%    3.965n ± 0%   -0.41% (p=0.000 n=10)
Erfcinv                  3.985n ± 0%    3.977n ± 0%   -0.20% (p=0.000 n=10)
ExpGo                    8.721n ± 2%    8.252n ± 0%   -5.38% (p=0.000 n=10)
Expm1                    4.378n ± 0%    4.228n ± 0%   -3.43% (p=0.000 n=10)
Exp2                     8.313n ± 0%    7.855n ± 0%   -5.52% (p=0.000 n=10)
Exp2Go                   8.498n ± 2%    7.921n ± 0%   -6.79% (p=0.000 n=10)
Mod                      15.16n ± 4%    12.20n ± 1%  -19.58% (p=0.000 n=10)
Frexp                    1.780n ± 2%    1.496n ± 0%  -15.96% (p=0.000 n=10)
Gamma                    4.378n ± 1%    4.013n ± 0%   -8.35% (p=0.000 n=10)
HypotGo                  2.655n ± 5%    2.427n ± 1%   -8.57% (p=0.000 n=10)
Ilogb                    1.912n ± 5%    1.749n ± 0%   -8.53% (p=0.000 n=10)
J0                       22.43n ± 9%    20.46n ± 0%   -8.76% (p=0.000 n=10)
J1                       21.03n ± 4%    19.96n ± 0%   -5.09% (p=0.000 n=10)
Jn                       45.40n ± 1%    42.59n ± 0%   -6.20% (p=0.000 n=10)
Ldexp                    2.312n ± 1%    1.944n ± 0%  -15.94% (p=0.000 n=10)
Lgamma                   4.617n ± 1%    4.584n ± 0%   -0.73% (p=0.000 n=10)
Log                      4.226n ± 0%    4.213n ± 0%   -0.31% (p=0.001 n=10)
Logb                     1.771n ± 0%    1.775n ± 0%        ~ (p=0.097 n=10)
Log1p                    5.102n ± 2%    5.001n ± 0%   -1.97% (p=0.000 n=10)
Log10                    4.407n ± 0%    4.408n ± 0%        ~ (p=1.000 n=10)
Log2                     2.416n ± 1%    2.138n ± 0%  -11.51% (p=0.000 n=10)
Modf                     1.669n ± 2%    1.611n ± 0%   -3.50% (p=0.000 n=10)
Nextafter32              2.186n ± 0%    2.185n ± 0%        ~ (p=0.051 n=10)
Nextafter64              2.182n ± 0%    2.184n ± 0%   +0.09% (p=0.016 n=10)
PowInt                   11.39n ± 6%    10.68n ± 2%   -6.24% (p=0.000 n=10)
PowFrac                  26.60n ± 2%    26.12n ± 0%   -1.80% (p=0.000 n=10)
Pow10Pos                0.5067n ± 4%   0.5003n ± 1%   -1.27% (p=0.001 n=10)
Pow10Neg                0.8552n ± 0%   0.8552n ± 0%        ~ (p=0.928 n=10)
Round                    1.181n ± 0%    1.182n ± 0%   +0.08% (p=0.001 n=10)
RoundToEven              1.709n ± 0%    1.710n ± 0%        ~ (p=0.053 n=10)
Remainder                12.54n ± 5%    11.99n ± 2%   -4.46% (p=0.000 n=10)
Sin                      3.933n ± 5%    3.926n ± 0%   -0.17% (p=0.000 n=10)
Sincos                   5.672n ± 0%    5.522n ± 0%   -2.65% (p=0.000 n=10)
Sinh                     5.447n ± 1%    5.444n ± 0%   -0.06% (p=0.029 n=10)
Tan                      4.061n ± 0%    4.058n ± 0%   -0.07% (p=0.005 n=10)
Tanh                     5.599n ± 0%    5.595n ± 0%   -0.06% (p=0.042 n=10)
Y0                       20.75n ± 5%    19.73n ± 1%   -4.92% (p=0.000 n=10)
Y1                       20.87n ± 2%    19.78n ± 1%   -5.20% (p=0.000 n=10)
Yn                       44.50n ± 2%    42.04n ± 2%   -5.53% (p=0.000 n=10)
geomean                  4.989n         4.791n        -3.96%

goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │     sec/op     │    sec/op     vs base                  │
Acos                    159.9n ±  0%   159.9n ±  0%        ~ (p=0.269 n=10)
Acosh                   244.7n ±  0%   235.0n ±  0%   -3.98% (p=0.000 n=10)
Asin                    159.9n ±  0%   159.9n ±  0%        ~ (p=0.154 n=10)
Asinh                   270.8n ±  0%   261.1n ±  0%   -3.60% (p=0.000 n=10)
Atan                    119.1n ±  0%   119.1n ±  0%        ~ (p=0.347 n=10)
Atanh                   260.2n ±  0%   261.8n ±  4%        ~ (p=0.459 n=10)
Atan2                   186.8n ±  0%   186.8n ±  0%        ~ (p=0.487 n=10)
Cbrt                    203.5n ±  0%   198.2n ±  0%   -2.60% (p=0.000 n=10)
Ceil                    31.82n ±  0%   31.81n ±  0%        ~ (p=0.714 n=10)
Copysign                4.894n ±  0%   4.893n ±  0%        ~ (p=0.161 n=10)
Cos                     107.6n ±  0%   103.6n ±  0%   -3.76% (p=0.000 n=10)
Cosh                    259.0n ±  0%   252.8n ±  0%   -2.39% (p=0.000 n=10)
Erf                     133.7n ±  0%   133.7n ±  0%        ~ (p=0.720 n=10)
Erfc                    137.9n ±  0%   137.8n ±  0%   -0.04% (p=0.033 n=10)
Erfinv                  173.7n ±  0%   168.8n ±  0%   -2.82% (p=0.000 n=10)
Erfcinv                 173.7n ±  0%   168.8n ±  0%   -2.82% (p=0.000 n=10)
Exp                     215.3n ±  0%   208.1n ±  0%   -3.34% (p=0.000 n=10)
ExpGo                   226.7n ±  0%   220.6n ±  0%   -2.69% (p=0.000 n=10)
Expm1                   164.8n ±  0%   159.0n ±  0%   -3.52% (p=0.000 n=10)
Exp2                    185.0n ±  0%   182.7n ±  0%   -1.22% (p=0.000 n=10)
Exp2Go                  198.9n ±  0%   196.5n ±  0%   -1.21% (p=0.000 n=10)
Abs                     4.894n ±  0%   4.893n ±  0%        ~ (p=0.262 n=10)
Dim                     16.31n ±  0%   16.31n ±  0%        ~ (p=1.000 n=10)
Floor                   31.81n ±  0%   31.81n ±  0%        ~ (p=0.067 n=10)
Max                     26.11n ±  0%   26.10n ±  0%        ~ (p=0.080 n=10)
Min                     26.10n ±  0%   26.10n ±  0%        ~ (p=0.095 n=10)
Mod                     337.7n ±  0%   291.9n ±  0%  -13.56% (p=0.000 n=10)
Frexp                   50.57n ±  0%   42.41n ±  0%  -16.13% (p=0.000 n=10)
Gamma                   206.3n ±  0%   198.1n ±  0%   -4.00% (p=0.000 n=10)
Hypot                   94.62n ±  0%   94.61n ±  0%        ~ (p=0.437 n=10)
HypotGo                 109.3n ±  0%   109.3n ±  0%        ~ (p=1.000 n=10)
Ilogb                   44.05n ±  0%   44.04n ±  0%   -0.02% (p=0.025 n=10)
J0                      663.1n ±  0%   663.9n ±  0%   +0.13% (p=0.002 n=10)
J1                      663.9n ±  0%   666.4n ±  0%   +0.38% (p=0.000 n=10)
Jn                      1.404µ ±  0%   1.407µ ±  0%   +0.21% (p=0.000 n=10)
Ldexp                   57.10n ±  0%   48.93n ±  0%  -14.30% (p=0.000 n=10)
Lgamma                  185.1n ±  0%   187.6n ±  0%   +1.32% (p=0.000 n=10)
Log                     182.7n ±  0%   170.1n ±  0%   -6.87% (p=0.000 n=10)
Logb                    46.49n ±  0%   46.49n ±  0%        ~ (p=0.675 n=10)
Log1p                   184.3n ±  0%   179.4n ±  0%   -2.63% (p=0.000 n=10)
Log10                   184.3n ±  0%   171.2n ±  0%   -7.08% (p=0.000 n=10)
Log2                    66.05n ±  0%   57.90n ±  0%  -12.34% (p=0.000 n=10)
Modf                    34.25n ±  0%   34.24n ±  0%        ~ (p=0.163 n=10)
Nextafter32             49.33n ±  1%   48.93n ±  0%   -0.81% (p=0.002 n=10)
Nextafter64             43.64n ±  0%   43.23n ±  0%   -0.93% (p=0.000 n=10)
PowInt                  267.6n ±  0%   251.2n ±  0%   -6.11% (p=0.000 n=10)
PowFrac                 672.9n ±  0%   637.9n ±  0%   -5.19% (p=0.000 n=10)
Pow10Pos                13.87n ±  0%   13.87n ±  0%        ~ (p=1.000 n=10)
Pow10Neg                19.58n ± 62%   19.59n ± 62%        ~ (p=0.355 n=10)
Round                   23.65n ±  0%   23.65n ±  0%        ~ (p=1.000 n=10)
RoundToEven             27.73n ±  0%   27.73n ±  0%        ~ (p=0.635 n=10)
Remainder               309.9n ±  0%   280.5n ±  0%   -9.49% (p=0.000 n=10)
Signbit                 13.05n ±  0%   13.05n ±  0%        ~ (p=1.000 n=10) ¹
Sin                     120.7n ±  0%   120.7n ±  0%        ~ (p=1.000 n=10) ¹
Sincos                  148.4n ±  0%   143.5n ±  0%   -3.30% (p=0.000 n=10)
Sinh                    275.6n ±  0%   267.5n ±  0%   -2.94% (p=0.000 n=10)
SqrtIndirect            3.262n ±  0%   3.262n ±  0%        ~ (p=0.263 n=10)
SqrtLatency             19.57n ±  0%   19.57n ±  0%        ~ (p=0.582 n=10)
SqrtIndirectLatency     19.57n ±  0%   19.57n ±  0%        ~ (p=1.000 n=10)
SqrtGoLatency           203.2n ±  0%   197.6n ±  0%   -2.78% (p=0.000 n=10)
SqrtPrime               4.952µ ±  0%   4.952µ ±  0%   -0.01% (p=0.025 n=10)
Tan                     153.3n ±  0%   153.3n ±  0%        ~ (p=1.000 n=10)
Tanh                    280.5n ±  0%   272.4n ±  0%   -2.91% (p=0.000 n=10)
Trunc                   31.81n ±  0%   31.81n ±  0%        ~ (p=1.000 n=10)
Y0                      680.1n ±  0%   664.8n ±  0%   -2.25% (p=0.000 n=10)
Y1                      684.2n ±  0%   669.6n ±  0%   -2.14% (p=0.000 n=10)
Yn                      1.444µ ±  0%   1.410µ ±  0%   -2.35% (p=0.000 n=10)
Float64bits             5.709n ±  0%   5.708n ±  0%        ~ (p=0.573 n=10)
Float64frombits         4.893n ±  0%   4.893n ±  0%        ~ (p=0.734 n=10)
Float32bits             12.23n ±  0%   12.23n ±  0%        ~ (p=0.628 n=10)
Float32frombits         4.893n ±  0%   4.893n ±  0%        ~ (p=0.971 n=10)
FMA                     4.893n ±  0%   4.893n ±  0%        ~ (p=0.736 n=10)
geomean                 88.96n         87.05n         -2.15%
¹ all samples are equal

Change-Id: I8db8ac7b7b3430b946b89e88dd6c1546804125c3
Reviewed-on: https://go-review.googlesource.com/c/go/+/697360
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Munday <mikemndy@gmail.com>
---
 .../compile/internal/ssa/_gen/generic.rules   |  16 +
 src/cmd/compile/internal/ssa/fuse.go          |  10 +-
 .../compile/internal/ssa/fuse_comparisons.go  | 116 +++-
 .../compile/internal/ssa/rewritegeneric.go    | 553 ++++++++++++++++++
 src/cmd/compile/internal/test/float_test.go   | 104 ++++
 test/codegen/fuse.go                          |  80 +++
 6 files changed, 855 insertions(+), 24 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules
index 6fdea7cc7a..048d9958dc 100644
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@@ -347,6 +347,22 @@
 (OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
 (OrB ((Less|Leq)8U  (Const8  [c]) x) (Leq8U  x (Const8  [d]))) && uint8(c)  >= uint8(d+1)  && uint8(d+1)  > uint8(d)  => ((Less|Leq)8U  (Const8  <x.Type> [c-d-1]) (Sub8  <x.Type> x (Const8  <x.Type> [d+1])))
 
+// NaN check: ( x != x || x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) x) )
+(OrB (Neq64F x x) ((Less|Leq)64F x y:(Const64F [c]))) => (Not ((Leq|Less)64F y x))
+(OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) x)) => (Not ((Leq|Less)64F x y))
+(OrB (Neq32F x x) ((Less|Leq)32F x y:(Const32F [c]))) => (Not ((Leq|Less)32F y x))
+(OrB (Neq32F x x) ((Less|Leq)32F y:(Const32F [c]) x)) => (Not ((Leq|Less)32F x y))
+
+// NaN check: ( x != x || Abs(x) (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) Abs(x) )
+(OrB (Neq64F x x) ((Less|Leq)64F abs:(Abs x) y:(Const64F [c]))) => (Not ((Leq|Less)64F y abs))
+(OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) abs:(Abs x))) => (Not ((Leq|Less)64F abs y))
+
+// NaN check: ( x != x || -x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) -x) )
+(OrB (Neq64F x x) ((Less|Leq)64F neg:(Neg64F x) y:(Const64F [c]))) => (Not ((Leq|Less)64F y neg))
+(OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) neg:(Neg64F x))) => (Not ((Leq|Less)64F neg y))
+(OrB (Neq32F x x) ((Less|Leq)32F neg:(Neg32F x) y:(Const32F [c]))) => (Not ((Leq|Less)32F y neg))
+(OrB (Neq32F x x) ((Less|Leq)32F y:(Const32F [c]) neg:(Neg32F x))) => (Not ((Leq|Less)32F neg y))
+
 // Canonicalize x-const to x+(-const)
 (Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 => (Add64 (Const64 <t> [-c]) x)
 (Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 => (Add32 (Const32 <t> [-c]) x)
diff --git a/src/cmd/compile/internal/ssa/fuse.go b/src/cmd/compile/internal/ssa/fuse.go
index 68defde7b4..0cee91b532 100644
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@@ -9,8 +9,8 @@ import (
 	"fmt"
 )
 
-// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange).
-func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange) }
+// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck).
+func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck) }
 
 // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
 func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
@@ -21,6 +21,7 @@ const (
 	fuseTypePlain fuseType = 1 << iota
 	fuseTypeIf
 	fuseTypeIntInRange
+	fuseTypeNanCheck
 	fuseTypeBranchRedirect
 	fuseTypeShortCircuit
 )
@@ -38,7 +39,10 @@ func fuse(f *Func, typ fuseType) {
 				changed = fuseBlockIf(b) || changed
 			}
 			if typ&fuseTypeIntInRange != 0 {
-				changed = fuseIntegerComparisons(b) || changed
+				changed = fuseIntInRange(b) || changed
+			}
+			if typ&fuseTypeNanCheck != 0 {
+				changed = fuseNanCheck(b) || changed
 			}
 			if typ&fuseTypePlain != 0 {
 				changed = fuseBlockPlain(b) || changed
diff --git a/src/cmd/compile/internal/ssa/fuse_comparisons.go b/src/cmd/compile/internal/ssa/fuse_comparisons.go
index f5fb84b0d7..b6eb8fcb90 100644
--- a/src/cmd/compile/internal/ssa/fuse_comparisons.go
+++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go
@@ -4,21 +4,36 @@
 
 package ssa
 
-// fuseIntegerComparisons optimizes inequalities such as '1 <= x && x < 5',
-// which can be optimized to 'unsigned(x-1) < 4'.
+// fuseIntInRange transforms integer range checks to remove the short-circuit operator. For example,
+// it would convert `if 1 <= x && x < 5 { ... }` into `if (1 <= x) & (x < 5) { ... }`. Rewrite rules
+// can then optimize these into unsigned range checks, `if unsigned(x-1) < 4 { ... }` in this case.
+func fuseIntInRange(b *Block) bool {
+	return fuseComparisons(b, canOptIntInRange)
+}
+
+// fuseNanCheck replaces the short-circuit operators between NaN checks and comparisons with
+// constants. For example, it would transform `if x != x || x > 1.0 { ... }` into
+// `if (x != x) | (x > 1.0) { ... }`. Rewrite rules can then merge the NaN check with the comparison,
+// in this case generating `if !(x <= 1.0) { ... }`.
+func fuseNanCheck(b *Block) bool {
+	return fuseComparisons(b, canOptNanCheck)
+}
+
+// fuseComparisons looks for control graphs that match this pattern:
 //
-// Look for branch structure like:
-//
-//	p
+//	p - predecessor
 //	|\
-//	| b
+//	| b - block
 //	|/ \
-//	s0 s1
+//	s0 s1 - successors
 //
-// In our example, p has control '1 <= x', b has control 'x < 5',
-// and s0 and s1 are the if and else results of the comparison.
+// This pattern is typical for if statements such as `if x || y { ... }` and `if x && y { ... }`.
 //
-// This will be optimized into:
+// If canOptControls returns true when passed the control values for p and b then fuseComparisons
+// will try to convert p into a plain block with only one successor (b) and modify b's control
+// value to include p's control value (effectively causing b to be speculatively executed).
+//
+// This transformation results in a control graph that will now look like this:
 //
 //	p
 //	 \
@@ -26,9 +41,12 @@ package ssa
 //	 / \
 //	s0 s1
 //
-// where b has the combined control value 'unsigned(x-1) < 4'.
 // Later passes will then fuse p and b.
-func fuseIntegerComparisons(b *Block) bool {
+//
+// In other words `if x || y { ... }` will become `if x | y { ... }` and `if x && y { ... }` will
+// become `if x & y { ... }`. This is a useful transformation because we can then use rewrite
+// rules to optimize `x | y` and `x & y`.
+func fuseComparisons(b *Block, canOptControls func(a, b *Value, op Op) bool) bool {
 	if len(b.Preds) != 1 {
 		return false
 	}
@@ -45,14 +63,6 @@ func fuseIntegerComparisons(b *Block) bool {
 		return false
 	}
 
-	// Check if the control values combine to make an integer inequality that
-	// can be further optimized later.
-	bc := b.Controls[0]
-	pc := p.Controls[0]
-	if !areMergeableInequalities(bc, pc) {
-		return false
-	}
-
 	// If the first (true) successors match then we have a disjunction (||).
 	// If the second (false) successors match then we have a conjunction (&&).
 	for i, op := range [2]Op{OpOrB, OpAndB} {
@@ -60,6 +70,13 @@ func fuseIntegerComparisons(b *Block) bool {
 			continue
 		}
 
+		// Check if the control values can be usefully combined.
+		bc := b.Controls[0]
+		pc := p.Controls[0]
+		if !canOptControls(bc, pc, op) {
+			return false
+		}
+
 		// TODO(mundaym): should we also check the cost of executing b?
 		// Currently we might speculatively execute b even if b contains
 		// a lot of instructions. We could just check that len(b.Values)
@@ -125,7 +142,7 @@ func isUnsignedInequality(v *Value) bool {
 	return false
 }
 
-func areMergeableInequalities(x, y *Value) bool {
+func canOptIntInRange(x, y *Value, op Op) bool {
 	// We need both inequalities to be either in the signed or unsigned domain.
 	// TODO(mundaym): it would also be good to merge when we have an Eq op that
 	// could be transformed into a Less/Leq. For example in the unsigned
@@ -155,3 +172,60 @@ func areMergeableInequalities(x, y *Value) bool {
 	}
 	return false
 }
+
+// canOptNanCheck reports whether one of arguments is a NaN check and the other
+// is a comparison with a constant that can be combined together.
+//
+// Examples (c must be a constant):
+//
+//	v != v || v <  c => !(c <= v)
+//	v != v || v <= c => !(c <  v)
+//	v != v || c <  v => !(v <= c)
+//	v != v || c <= v => !(v <  c)
+func canOptNanCheck(x, y *Value, op Op) bool {
+	if op != OpOrB {
+		return false
+	}
+
+	for i := 0; i <= 1; i, x, y = i+1, y, x {
+		if len(x.Args) != 2 || x.Args[0] != x.Args[1] {
+			continue
+		}
+		v := x.Args[0]
+		switch x.Op {
+		case OpNeq64F:
+			if y.Op != OpLess64F && y.Op != OpLeq64F {
+				return false
+			}
+			for j := 0; j <= 1; j++ {
+				a, b := y.Args[j], y.Args[j^1]
+				if a.Op != OpConst64F {
+					continue
+				}
+				// Sign bit operations not affect NaN check results. This special case allows us
+				// to optimize statements like `if v != v || Abs(v) > c { ... }`.
+				if (b.Op == OpAbs || b.Op == OpNeg64F) && b.Args[0] == v {
+					return true
+				}
+				return b == v
+			}
+		case OpNeq32F:
+			if y.Op != OpLess32F && y.Op != OpLeq32F {
+				return false
+			}
+			for j := 0; j <= 1; j++ {
+				a, b := y.Args[j], y.Args[j^1]
+				if a.Op != OpConst32F {
+					continue
+				}
+				// Sign bit operations not affect NaN check results. This special case allows us
+				// to optimize statements like `if v != v || -v > c { ... }`.
+				if b.Op == OpNeg32F && b.Args[0] == v {
+					return true
+				}
+				return b == v
+			}
+		}
+	}
+	return false
+}
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 5720063f34..37ba324d86 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -23957,6 +23957,7 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
+	typ := &b.Func.Config.Types
 	// match: (OrB (Less64 (Const64 [c]) x) (Less64 x (Const64 [d])))
 	// cond: c >= d
 	// result: (Less64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
@@ -25269,6 +25270,558 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
 		}
 		break
 	}
+	// match: (OrB (Neq64F x x) (Less64F x y:(Const64F [c])))
+	// result: (Not (Leq64F y x))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst64F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
+			v0.AddArg2(y, x)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Leq64F x y:(Const64F [c])))
+	// result: (Not (Less64F y x))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst64F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
+			v0.AddArg2(y, x)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Less64F y:(Const64F [c]) x))
+	// result: (Not (Leq64F x y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst64F {
+				continue
+			}
+			if x != v_1.Args[1] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
+			v0.AddArg2(x, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Leq64F y:(Const64F [c]) x))
+	// result: (Not (Less64F x y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst64F {
+				continue
+			}
+			if x != v_1.Args[1] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
+			v0.AddArg2(x, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Less32F x y:(Const32F [c])))
+	// result: (Not (Leq32F y x))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst32F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
+			v0.AddArg2(y, x)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Leq32F x y:(Const32F [c])))
+	// result: (Not (Less32F y x))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst32F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
+			v0.AddArg2(y, x)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Less32F y:(Const32F [c]) x))
+	// result: (Not (Leq32F x y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst32F {
+				continue
+			}
+			if x != v_1.Args[1] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
+			v0.AddArg2(x, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Leq32F y:(Const32F [c]) x))
+	// result: (Not (Less32F x y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst32F {
+				continue
+			}
+			if x != v_1.Args[1] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
+			v0.AddArg2(x, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Less64F abs:(Abs x) y:(Const64F [c])))
+	// result: (Not (Leq64F y abs))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			abs := v_1.Args[0]
+			if abs.Op != OpAbs || x != abs.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst64F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
+			v0.AddArg2(y, abs)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Leq64F abs:(Abs x) y:(Const64F [c])))
+	// result: (Not (Less64F y abs))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			abs := v_1.Args[0]
+			if abs.Op != OpAbs || x != abs.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst64F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
+			v0.AddArg2(y, abs)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Less64F y:(Const64F [c]) abs:(Abs x)))
+	// result: (Not (Leq64F abs y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst64F {
+				continue
+			}
+			abs := v_1.Args[1]
+			if abs.Op != OpAbs || x != abs.Args[0] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
+			v0.AddArg2(abs, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Leq64F y:(Const64F [c]) abs:(Abs x)))
+	// result: (Not (Less64F abs y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst64F {
+				continue
+			}
+			abs := v_1.Args[1]
+			if abs.Op != OpAbs || x != abs.Args[0] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
+			v0.AddArg2(abs, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Less64F neg:(Neg64F x) y:(Const64F [c])))
+	// result: (Not (Leq64F y neg))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			neg := v_1.Args[0]
+			if neg.Op != OpNeg64F || x != neg.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst64F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
+			v0.AddArg2(y, neg)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Leq64F neg:(Neg64F x) y:(Const64F [c])))
+	// result: (Not (Less64F y neg))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			neg := v_1.Args[0]
+			if neg.Op != OpNeg64F || x != neg.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst64F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
+			v0.AddArg2(y, neg)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Less64F y:(Const64F [c]) neg:(Neg64F x)))
+	// result: (Not (Leq64F neg y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst64F {
+				continue
+			}
+			neg := v_1.Args[1]
+			if neg.Op != OpNeg64F || x != neg.Args[0] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
+			v0.AddArg2(neg, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq64F x x) (Leq64F y:(Const64F [c]) neg:(Neg64F x)))
+	// result: (Not (Less64F neg y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq64F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst64F {
+				continue
+			}
+			neg := v_1.Args[1]
+			if neg.Op != OpNeg64F || x != neg.Args[0] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
+			v0.AddArg2(neg, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Less32F neg:(Neg32F x) y:(Const32F [c])))
+	// result: (Not (Leq32F y neg))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			neg := v_1.Args[0]
+			if neg.Op != OpNeg32F || x != neg.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst32F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
+			v0.AddArg2(y, neg)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Leq32F neg:(Neg32F x) y:(Const32F [c])))
+	// result: (Not (Less32F y neg))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			neg := v_1.Args[0]
+			if neg.Op != OpNeg32F || x != neg.Args[0] {
+				continue
+			}
+			y := v_1.Args[1]
+			if y.Op != OpConst32F {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
+			v0.AddArg2(y, neg)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Less32F y:(Const32F [c]) neg:(Neg32F x)))
+	// result: (Not (Leq32F neg y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLess32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst32F {
+				continue
+			}
+			neg := v_1.Args[1]
+			if neg.Op != OpNeg32F || x != neg.Args[0] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
+			v0.AddArg2(neg, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (OrB (Neq32F x x) (Leq32F y:(Const32F [c]) neg:(Neg32F x)))
+	// result: (Not (Less32F neg y))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32F {
+				continue
+			}
+			x := v_0.Args[1]
+			if x != v_0.Args[0] || v_1.Op != OpLeq32F {
+				continue
+			}
+			_ = v_1.Args[1]
+			y := v_1.Args[0]
+			if y.Op != OpConst32F {
+				continue
+			}
+			neg := v_1.Args[1]
+			if neg.Op != OpNeg32F || x != neg.Args[0] {
+				continue
+			}
+			v.reset(OpNot)
+			v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
+			v0.AddArg2(neg, y)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
 	return false
 }
 func rewriteValuegeneric_OpPhi(v *Value) bool {
diff --git a/src/cmd/compile/internal/test/float_test.go b/src/cmd/compile/internal/test/float_test.go
index 9e61148c52..7a5e27870f 100644
--- a/src/cmd/compile/internal/test/float_test.go
+++ b/src/cmd/compile/internal/test/float_test.go
@@ -623,6 +623,110 @@ func TestInf(t *testing.T) {
 	}
 }
 
+//go:noinline
+func isNaNOrGtZero64(x float64) bool {
+	return math.IsNaN(x) || x > 0
+}
+
+//go:noinline
+func isNaNOrGteZero64(x float64) bool {
+	return x >= 0 || math.IsNaN(x)
+}
+
+//go:noinline
+func isNaNOrLtZero64(x float64) bool {
+	return x < 0 || math.IsNaN(x)
+}
+
+//go:noinline
+func isNaNOrLteZero64(x float64) bool {
+	return math.IsNaN(x) || x <= 0
+}
+
+func TestFusedNaNChecks64(t *testing.T) {
+	tests := []struct {
+		value             float64
+		isZero            bool
+		isGreaterThanZero bool
+		isLessThanZero    bool
+		isNaN             bool
+	}{
+		{value: 0.0, isZero: true},
+		{value: math.Copysign(0, -1), isZero: true},
+		{value: 1.0, isGreaterThanZero: true},
+		{value: -1.0, isLessThanZero: true},
+		{value: math.Inf(1), isGreaterThanZero: true},
+		{value: math.Inf(-1), isLessThanZero: true},
+		{value: math.NaN(), isNaN: true},
+	}
+
+	check := func(name string, f func(x float64) bool, value float64, want bool) {
+		got := f(value)
+		if got != want {
+			t.Errorf("%v(%g): want %v, got %v", name, value, want, got)
+		}
+	}
+
+	for _, test := range tests {
+		check("isNaNOrGtZero64", isNaNOrGtZero64, test.value, test.isNaN || test.isGreaterThanZero)
+		check("isNaNOrGteZero64", isNaNOrGteZero64, test.value, test.isNaN || test.isGreaterThanZero || test.isZero)
+		check("isNaNOrLtZero64", isNaNOrLtZero64, test.value, test.isNaN || test.isLessThanZero)
+		check("isNaNOrLteZero64", isNaNOrLteZero64, test.value, test.isNaN || test.isLessThanZero || test.isZero)
+	}
+}
+
+//go:noinline
+func isNaNOrGtZero32(x float32) bool {
+	return x > 0 || x != x
+}
+
+//go:noinline
+func isNaNOrGteZero32(x float32) bool {
+	return x != x || x >= 0
+}
+
+//go:noinline
+func isNaNOrLtZero32(x float32) bool {
+	return x != x || x < 0
+}
+
+//go:noinline
+func isNaNOrLteZero32(x float32) bool {
+	return x <= 0 || x != x
+}
+
+func TestFusedNaNChecks32(t *testing.T) {
+	tests := []struct {
+		value             float32
+		isZero            bool
+		isGreaterThanZero bool
+		isLessThanZero    bool
+		isNaN             bool
+	}{
+		{value: 0.0, isZero: true},
+		{value: float32(math.Copysign(0, -1)), isZero: true},
+		{value: 1.0, isGreaterThanZero: true},
+		{value: -1.0, isLessThanZero: true},
+		{value: float32(math.Inf(1)), isGreaterThanZero: true},
+		{value: float32(math.Inf(-1)), isLessThanZero: true},
+		{value: float32(math.NaN()), isNaN: true},
+	}
+
+	check := func(name string, f func(x float32) bool, value float32, want bool) {
+		got := f(value)
+		if got != want {
+			t.Errorf("%v(%g): want %v, got %v", name, value, want, got)
+		}
+	}
+
+	for _, test := range tests {
+		check("isNaNOrGtZero32", isNaNOrGtZero32, test.value, test.isNaN || test.isGreaterThanZero)
+		check("isNaNOrGteZero32", isNaNOrGteZero32, test.value, test.isNaN || test.isGreaterThanZero || test.isZero)
+		check("isNaNOrLtZero32", isNaNOrLtZero32, test.value, test.isNaN || test.isLessThanZero)
+		check("isNaNOrLteZero32", isNaNOrLteZero32, test.value, test.isNaN || test.isLessThanZero || test.isZero)
+	}
+}
+
 var sinkFloat float64
 
 func BenchmarkMul2(b *testing.B) {
diff --git a/test/codegen/fuse.go b/test/codegen/fuse.go
index 8d6ea3c5c7..561bac7224 100644
--- a/test/codegen/fuse.go
+++ b/test/codegen/fuse.go
@@ -6,6 +6,8 @@
 
 package codegen
 
+import "math"
+
 // Notes:
 // - these examples use channels to provide a source of
 //   unknown values that cannot be optimized away
@@ -196,6 +198,84 @@ func ui4d(c <-chan uint8) {
 	}
 }
 
+// -------------------------------------//
+// merge NaN checks                     //
+// ------------------------------------ //
+
+func f64NaNOrPosInf(c <-chan float64) {
+	// This test assumes IsInf(x, 1) is implemented as x > MaxFloat rather than x == Inf(1).
+
+	// amd64:"JCS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FCLASSD",-"FLED",-"FLTD",-"FNED",-"FEQD"
+	for x := <-c; math.IsNaN(x) || math.IsInf(x, 1); x = <-c {
+	}
+}
+
+func f64NaNOrNegInf(c <-chan float64) {
+	// This test assumes IsInf(x, -1) is implemented as x < -MaxFloat rather than x == Inf(-1).
+
+	// amd64:"JCS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FCLASSD",-"FLED",-"FLTD",-"FNED",-"FEQD"
+	for x := <-c; math.IsNaN(x) || math.IsInf(x, -1); x = <-c {
+	}
+}
+
+func f64NaNOrLtOne(c <-chan float64) {
+	// amd64:"JCS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLED",-"FLTD",-"FNED",-"FEQD"
+	for x := <-c; math.IsNaN(x) || x < 1; x = <-c {
+	}
+}
+
+func f64NaNOrLteOne(c <-chan float64) {
+	// amd64:"JLS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLTD",-"FLED",-"FNED",-"FEQD"
+	for x := <-c; x <= 1 || math.IsNaN(x); x = <-c {
+	}
+}
+
+func f64NaNOrGtOne(c <-chan float64) {
+	// amd64:"JCS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLED",-"FLTD",-"FNED",-"FEQD"
+	for x := <-c; math.IsNaN(x) || x > 1; x = <-c {
+	}
+}
+
+func f64NaNOrGteOne(c <-chan float64) {
+	// amd64:"JLS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLTD",-"FLED",-"FNED",-"FEQD"
+	for x := <-c; x >= 1 || math.IsNaN(x); x = <-c {
+	}
+}
+
+func f32NaNOrLtOne(c <-chan float32) {
+	// amd64:"JCS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLES",-"FLTS",-"FNES",-"FEQS"
+	for x := <-c; x < 1 || x != x; x = <-c {
+	}
+}
+
+func f32NaNOrLteOne(c <-chan float32) {
+	// amd64:"JLS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLTS",-"FLES",-"FNES",-"FEQS"
+	for x := <-c; x != x || x <= 1; x = <-c {
+	}
+}
+
+func f32NaNOrGtOne(c <-chan float32) {
+	// amd64:"JCS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLES",-"FLTS",-"FNES",-"FEQS"
+	for x := <-c; x > 1 || x != x; x = <-c {
+	}
+}
+
+func f32NaNOrGteOne(c <-chan float32) {
+	// amd64:"JLS",-"JNE",-"JPS",-"JPC"
+	// riscv64:"FLTS",-"FLES",-"FNES",-"FEQS"
+	for x := <-c; x != x || x >= 1; x = <-c {
+	}
+}
+
 // ------------------------------------ //
 // regressions                          //
 // ------------------------------------ //