mirror of
https://github.com/golang/go.git
synced 2025-10-19 11:03:18 +00:00
cmd/compile: fuse NaN checks with other comparisons
NaN checks can often be merged into other comparisons by inverting them. For example, `math.IsNaN(x) || x > 0` is equivalent to `!(x <= 0)`. goos: linux goarch: amd64 pkg: math cpu: 12th Gen Intel(R) Core(TM) i7-12700T │ sec/op │ sec/op vs base │ Acos 4.315n ± 0% 4.314n ± 0% ~ (p=0.642 n=10) Acosh 8.398n ± 0% 7.779n ± 0% -7.37% (p=0.000 n=10) Asin 4.203n ± 0% 4.211n ± 0% +0.20% (p=0.001 n=10) Asinh 10.150n ± 0% 9.562n ± 0% -5.79% (p=0.000 n=10) Atan 2.363n ± 0% 2.363n ± 0% ~ (p=0.801 n=10) Atanh 8.192n ± 2% 7.685n ± 0% -6.20% (p=0.000 n=10) Atan2 4.013n ± 0% 4.010n ± 0% ~ (p=0.073 n=10) Cbrt 4.858n ± 0% 4.755n ± 0% -2.12% (p=0.000 n=10) Cos 4.596n ± 0% 4.357n ± 0% -5.20% (p=0.000 n=10) Cosh 5.071n ± 0% 5.071n ± 0% ~ (p=0.585 n=10) Erf 2.802n ± 1% 2.788n ± 0% -0.54% (p=0.002 n=10) Erfc 3.087n ± 1% 3.071n ± 0% ~ (p=0.320 n=10) Erfinv 3.981n ± 0% 3.965n ± 0% -0.41% (p=0.000 n=10) Erfcinv 3.985n ± 0% 3.977n ± 0% -0.20% (p=0.000 n=10) ExpGo 8.721n ± 2% 8.252n ± 0% -5.38% (p=0.000 n=10) Expm1 4.378n ± 0% 4.228n ± 0% -3.43% (p=0.000 n=10) Exp2 8.313n ± 0% 7.855n ± 0% -5.52% (p=0.000 n=10) Exp2Go 8.498n ± 2% 7.921n ± 0% -6.79% (p=0.000 n=10) Mod 15.16n ± 4% 12.20n ± 1% -19.58% (p=0.000 n=10) Frexp 1.780n ± 2% 1.496n ± 0% -15.96% (p=0.000 n=10) Gamma 4.378n ± 1% 4.013n ± 0% -8.35% (p=0.000 n=10) HypotGo 2.655n ± 5% 2.427n ± 1% -8.57% (p=0.000 n=10) Ilogb 1.912n ± 5% 1.749n ± 0% -8.53% (p=0.000 n=10) J0 22.43n ± 9% 20.46n ± 0% -8.76% (p=0.000 n=10) J1 21.03n ± 4% 19.96n ± 0% -5.09% (p=0.000 n=10) Jn 45.40n ± 1% 42.59n ± 0% -6.20% (p=0.000 n=10) Ldexp 2.312n ± 1% 1.944n ± 0% -15.94% (p=0.000 n=10) Lgamma 4.617n ± 1% 4.584n ± 0% -0.73% (p=0.000 n=10) Log 4.226n ± 0% 4.213n ± 0% -0.31% (p=0.001 n=10) Logb 1.771n ± 0% 1.775n ± 0% ~ (p=0.097 n=10) Log1p 5.102n ± 2% 5.001n ± 0% -1.97% (p=0.000 n=10) Log10 4.407n ± 0% 4.408n ± 0% ~ (p=1.000 n=10) Log2 2.416n ± 1% 2.138n ± 0% -11.51% (p=0.000 n=10) Modf 1.669n ± 2% 1.611n ± 0% -3.50% (p=0.000 n=10) Nextafter32 2.186n ± 0% 2.185n ± 0% ~ (p=0.051 n=10) Nextafter64 2.182n ± 0% 2.184n ± 0% +0.09% (p=0.016 n=10) PowInt 11.39n ± 6% 10.68n ± 2% -6.24% (p=0.000 n=10) PowFrac 26.60n ± 2% 26.12n ± 0% -1.80% (p=0.000 n=10) Pow10Pos 0.5067n ± 4% 0.5003n ± 1% -1.27% (p=0.001 n=10) Pow10Neg 0.8552n ± 0% 0.8552n ± 0% ~ (p=0.928 n=10) Round 1.181n ± 0% 1.182n ± 0% +0.08% (p=0.001 n=10) RoundToEven 1.709n ± 0% 1.710n ± 0% ~ (p=0.053 n=10) Remainder 12.54n ± 5% 11.99n ± 2% -4.46% (p=0.000 n=10) Sin 3.933n ± 5% 3.926n ± 0% -0.17% (p=0.000 n=10) Sincos 5.672n ± 0% 5.522n ± 0% -2.65% (p=0.000 n=10) Sinh 5.447n ± 1% 5.444n ± 0% -0.06% (p=0.029 n=10) Tan 4.061n ± 0% 4.058n ± 0% -0.07% (p=0.005 n=10) Tanh 5.599n ± 0% 5.595n ± 0% -0.06% (p=0.042 n=10) Y0 20.75n ± 5% 19.73n ± 1% -4.92% (p=0.000 n=10) Y1 20.87n ± 2% 19.78n ± 1% -5.20% (p=0.000 n=10) Yn 44.50n ± 2% 42.04n ± 2% -5.53% (p=0.000 n=10) geomean 4.989n 4.791n -3.96% goos: linux goarch: riscv64 pkg: math cpu: Spacemit(R) X60 │ sec/op │ sec/op vs base │ Acos 159.9n ± 0% 159.9n ± 0% ~ (p=0.269 n=10) Acosh 244.7n ± 0% 235.0n ± 0% -3.98% (p=0.000 n=10) Asin 159.9n ± 0% 159.9n ± 0% ~ (p=0.154 n=10) Asinh 270.8n ± 0% 261.1n ± 0% -3.60% (p=0.000 n=10) Atan 119.1n ± 0% 119.1n ± 0% ~ (p=0.347 n=10) Atanh 260.2n ± 0% 261.8n ± 4% ~ (p=0.459 n=10) Atan2 186.8n ± 0% 186.8n ± 0% ~ (p=0.487 n=10) Cbrt 203.5n ± 0% 198.2n ± 0% -2.60% (p=0.000 n=10) Ceil 31.82n ± 0% 31.81n ± 0% ~ (p=0.714 n=10) Copysign 4.894n ± 0% 4.893n ± 0% ~ (p=0.161 n=10) Cos 107.6n ± 0% 103.6n ± 0% -3.76% (p=0.000 n=10) Cosh 259.0n ± 0% 252.8n ± 0% -2.39% (p=0.000 n=10) Erf 133.7n ± 0% 133.7n ± 0% ~ (p=0.720 n=10) Erfc 137.9n ± 0% 137.8n ± 0% -0.04% (p=0.033 n=10) Erfinv 173.7n ± 0% 168.8n ± 0% -2.82% (p=0.000 n=10) Erfcinv 173.7n ± 0% 168.8n ± 0% -2.82% (p=0.000 n=10) Exp 215.3n ± 0% 208.1n ± 0% -3.34% (p=0.000 n=10) ExpGo 226.7n ± 0% 220.6n ± 0% -2.69% (p=0.000 n=10) Expm1 164.8n ± 0% 159.0n ± 0% -3.52% (p=0.000 n=10) Exp2 185.0n ± 0% 182.7n ± 0% -1.22% (p=0.000 n=10) Exp2Go 198.9n ± 0% 196.5n ± 0% -1.21% (p=0.000 n=10) Abs 4.894n ± 0% 4.893n ± 0% ~ (p=0.262 n=10) Dim 16.31n ± 0% 16.31n ± 0% ~ (p=1.000 n=10) Floor 31.81n ± 0% 31.81n ± 0% ~ (p=0.067 n=10) Max 26.11n ± 0% 26.10n ± 0% ~ (p=0.080 n=10) Min 26.10n ± 0% 26.10n ± 0% ~ (p=0.095 n=10) Mod 337.7n ± 0% 291.9n ± 0% -13.56% (p=0.000 n=10) Frexp 50.57n ± 0% 42.41n ± 0% -16.13% (p=0.000 n=10) Gamma 206.3n ± 0% 198.1n ± 0% -4.00% (p=0.000 n=10) Hypot 94.62n ± 0% 94.61n ± 0% ~ (p=0.437 n=10) HypotGo 109.3n ± 0% 109.3n ± 0% ~ (p=1.000 n=10) Ilogb 44.05n ± 0% 44.04n ± 0% -0.02% (p=0.025 n=10) J0 663.1n ± 0% 663.9n ± 0% +0.13% (p=0.002 n=10) J1 663.9n ± 0% 666.4n ± 0% +0.38% (p=0.000 n=10) Jn 1.404µ ± 0% 1.407µ ± 0% +0.21% (p=0.000 n=10) Ldexp 57.10n ± 0% 48.93n ± 0% -14.30% (p=0.000 n=10) Lgamma 185.1n ± 0% 187.6n ± 0% +1.32% (p=0.000 n=10) Log 182.7n ± 0% 170.1n ± 0% -6.87% (p=0.000 n=10) Logb 46.49n ± 0% 46.49n ± 0% ~ (p=0.675 n=10) Log1p 184.3n ± 0% 179.4n ± 0% -2.63% (p=0.000 n=10) Log10 184.3n ± 0% 171.2n ± 0% -7.08% (p=0.000 n=10) Log2 66.05n ± 0% 57.90n ± 0% -12.34% (p=0.000 n=10) Modf 34.25n ± 0% 34.24n ± 0% ~ (p=0.163 n=10) Nextafter32 49.33n ± 1% 48.93n ± 0% -0.81% (p=0.002 n=10) Nextafter64 43.64n ± 0% 43.23n ± 0% -0.93% (p=0.000 n=10) PowInt 267.6n ± 0% 251.2n ± 0% -6.11% (p=0.000 n=10) PowFrac 672.9n ± 0% 637.9n ± 0% -5.19% (p=0.000 n=10) Pow10Pos 13.87n ± 0% 13.87n ± 0% ~ (p=1.000 n=10) Pow10Neg 19.58n ± 62% 19.59n ± 62% ~ (p=0.355 n=10) Round 23.65n ± 0% 23.65n ± 0% ~ (p=1.000 n=10) RoundToEven 27.73n ± 0% 27.73n ± 0% ~ (p=0.635 n=10) Remainder 309.9n ± 0% 280.5n ± 0% -9.49% (p=0.000 n=10) Signbit 13.05n ± 0% 13.05n ± 0% ~ (p=1.000 n=10) ¹ Sin 120.7n ± 0% 120.7n ± 0% ~ (p=1.000 n=10) ¹ Sincos 148.4n ± 0% 143.5n ± 0% -3.30% (p=0.000 n=10) Sinh 275.6n ± 0% 267.5n ± 0% -2.94% (p=0.000 n=10) SqrtIndirect 3.262n ± 0% 3.262n ± 0% ~ (p=0.263 n=10) SqrtLatency 19.57n ± 0% 19.57n ± 0% ~ (p=0.582 n=10) SqrtIndirectLatency 19.57n ± 0% 19.57n ± 0% ~ (p=1.000 n=10) SqrtGoLatency 203.2n ± 0% 197.6n ± 0% -2.78% (p=0.000 n=10) SqrtPrime 4.952µ ± 0% 4.952µ ± 0% -0.01% (p=0.025 n=10) Tan 153.3n ± 0% 153.3n ± 0% ~ (p=1.000 n=10) Tanh 280.5n ± 0% 272.4n ± 0% -2.91% (p=0.000 n=10) Trunc 31.81n ± 0% 31.81n ± 0% ~ (p=1.000 n=10) Y0 680.1n ± 0% 664.8n ± 0% -2.25% (p=0.000 n=10) Y1 684.2n ± 0% 669.6n ± 0% -2.14% (p=0.000 n=10) Yn 1.444µ ± 0% 1.410µ ± 0% -2.35% (p=0.000 n=10) Float64bits 5.709n ± 0% 5.708n ± 0% ~ (p=0.573 n=10) Float64frombits 4.893n ± 0% 4.893n ± 0% ~ (p=0.734 n=10) Float32bits 12.23n ± 0% 12.23n ± 0% ~ (p=0.628 n=10) Float32frombits 4.893n ± 0% 4.893n ± 0% ~ (p=0.971 n=10) FMA 4.893n ± 0% 4.893n ± 0% ~ (p=0.736 n=10) geomean 88.96n 87.05n -2.15% ¹ all samples are equal Change-Id: I8db8ac7b7b3430b946b89e88dd6c1546804125c3 Reviewed-on: https://go-review.googlesource.com/c/go/+/697360 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Michael Munday <mikemndy@gmail.com>
This commit is contained in:
parent
78b43037dc
commit
97fd6bdecc
6 changed files with 855 additions and 24 deletions
|
@ -347,6 +347,22 @@
|
||||||
(OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
|
(OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
|
||||||
(OrB ((Less|Leq)8U (Const8 [c]) x) (Leq8U x (Const8 [d]))) && uint8(c) >= uint8(d+1) && uint8(d+1) > uint8(d) => ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1])))
|
(OrB ((Less|Leq)8U (Const8 [c]) x) (Leq8U x (Const8 [d]))) && uint8(c) >= uint8(d+1) && uint8(d+1) > uint8(d) => ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1])))
|
||||||
|
|
||||||
|
// NaN check: ( x != x || x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) x) )
|
||||||
|
(OrB (Neq64F x x) ((Less|Leq)64F x y:(Const64F [c]))) => (Not ((Leq|Less)64F y x))
|
||||||
|
(OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) x)) => (Not ((Leq|Less)64F x y))
|
||||||
|
(OrB (Neq32F x x) ((Less|Leq)32F x y:(Const32F [c]))) => (Not ((Leq|Less)32F y x))
|
||||||
|
(OrB (Neq32F x x) ((Less|Leq)32F y:(Const32F [c]) x)) => (Not ((Leq|Less)32F x y))
|
||||||
|
|
||||||
|
// NaN check: ( x != x || Abs(x) (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) Abs(x) )
|
||||||
|
(OrB (Neq64F x x) ((Less|Leq)64F abs:(Abs x) y:(Const64F [c]))) => (Not ((Leq|Less)64F y abs))
|
||||||
|
(OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) abs:(Abs x))) => (Not ((Leq|Less)64F abs y))
|
||||||
|
|
||||||
|
// NaN check: ( x != x || -x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) -x) )
|
||||||
|
(OrB (Neq64F x x) ((Less|Leq)64F neg:(Neg64F x) y:(Const64F [c]))) => (Not ((Leq|Less)64F y neg))
|
||||||
|
(OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) neg:(Neg64F x))) => (Not ((Leq|Less)64F neg y))
|
||||||
|
(OrB (Neq32F x x) ((Less|Leq)32F neg:(Neg32F x) y:(Const32F [c]))) => (Not ((Leq|Less)32F y neg))
|
||||||
|
(OrB (Neq32F x x) ((Less|Leq)32F y:(Const32F [c]) neg:(Neg32F x))) => (Not ((Leq|Less)32F neg y))
|
||||||
|
|
||||||
// Canonicalize x-const to x+(-const)
|
// Canonicalize x-const to x+(-const)
|
||||||
(Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 => (Add64 (Const64 <t> [-c]) x)
|
(Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 => (Add64 (Const64 <t> [-c]) x)
|
||||||
(Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 => (Add32 (Const32 <t> [-c]) x)
|
(Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 => (Add32 (Const32 <t> [-c]) x)
|
||||||
|
|
|
@ -9,8 +9,8 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange).
|
// fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck).
|
||||||
func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange) }
|
func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck) }
|
||||||
|
|
||||||
// fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
|
// fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
|
||||||
func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
|
func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
|
||||||
|
@ -21,6 +21,7 @@ const (
|
||||||
fuseTypePlain fuseType = 1 << iota
|
fuseTypePlain fuseType = 1 << iota
|
||||||
fuseTypeIf
|
fuseTypeIf
|
||||||
fuseTypeIntInRange
|
fuseTypeIntInRange
|
||||||
|
fuseTypeNanCheck
|
||||||
fuseTypeBranchRedirect
|
fuseTypeBranchRedirect
|
||||||
fuseTypeShortCircuit
|
fuseTypeShortCircuit
|
||||||
)
|
)
|
||||||
|
@ -38,7 +39,10 @@ func fuse(f *Func, typ fuseType) {
|
||||||
changed = fuseBlockIf(b) || changed
|
changed = fuseBlockIf(b) || changed
|
||||||
}
|
}
|
||||||
if typ&fuseTypeIntInRange != 0 {
|
if typ&fuseTypeIntInRange != 0 {
|
||||||
changed = fuseIntegerComparisons(b) || changed
|
changed = fuseIntInRange(b) || changed
|
||||||
|
}
|
||||||
|
if typ&fuseTypeNanCheck != 0 {
|
||||||
|
changed = fuseNanCheck(b) || changed
|
||||||
}
|
}
|
||||||
if typ&fuseTypePlain != 0 {
|
if typ&fuseTypePlain != 0 {
|
||||||
changed = fuseBlockPlain(b) || changed
|
changed = fuseBlockPlain(b) || changed
|
||||||
|
|
|
@ -4,21 +4,36 @@
|
||||||
|
|
||||||
package ssa
|
package ssa
|
||||||
|
|
||||||
// fuseIntegerComparisons optimizes inequalities such as '1 <= x && x < 5',
|
// fuseIntInRange transforms integer range checks to remove the short-circuit operator. For example,
|
||||||
// which can be optimized to 'unsigned(x-1) < 4'.
|
// it would convert `if 1 <= x && x < 5 { ... }` into `if (1 <= x) & (x < 5) { ... }`. Rewrite rules
|
||||||
|
// can then optimize these into unsigned range checks, `if unsigned(x-1) < 4 { ... }` in this case.
|
||||||
|
func fuseIntInRange(b *Block) bool {
|
||||||
|
return fuseComparisons(b, canOptIntInRange)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fuseNanCheck replaces the short-circuit operators between NaN checks and comparisons with
|
||||||
|
// constants. For example, it would transform `if x != x || x > 1.0 { ... }` into
|
||||||
|
// `if (x != x) | (x > 1.0) { ... }`. Rewrite rules can then merge the NaN check with the comparison,
|
||||||
|
// in this case generating `if !(x <= 1.0) { ... }`.
|
||||||
|
func fuseNanCheck(b *Block) bool {
|
||||||
|
return fuseComparisons(b, canOptNanCheck)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fuseComparisons looks for control graphs that match this pattern:
|
||||||
//
|
//
|
||||||
// Look for branch structure like:
|
// p - predecessor
|
||||||
//
|
|
||||||
// p
|
|
||||||
// |\
|
// |\
|
||||||
// | b
|
// | b - block
|
||||||
// |/ \
|
// |/ \
|
||||||
// s0 s1
|
// s0 s1 - successors
|
||||||
//
|
//
|
||||||
// In our example, p has control '1 <= x', b has control 'x < 5',
|
// This pattern is typical for if statements such as `if x || y { ... }` and `if x && y { ... }`.
|
||||||
// and s0 and s1 are the if and else results of the comparison.
|
|
||||||
//
|
//
|
||||||
// This will be optimized into:
|
// If canOptControls returns true when passed the control values for p and b then fuseComparisons
|
||||||
|
// will try to convert p into a plain block with only one successor (b) and modify b's control
|
||||||
|
// value to include p's control value (effectively causing b to be speculatively executed).
|
||||||
|
//
|
||||||
|
// This transformation results in a control graph that will now look like this:
|
||||||
//
|
//
|
||||||
// p
|
// p
|
||||||
// \
|
// \
|
||||||
|
@ -26,9 +41,12 @@ package ssa
|
||||||
// / \
|
// / \
|
||||||
// s0 s1
|
// s0 s1
|
||||||
//
|
//
|
||||||
// where b has the combined control value 'unsigned(x-1) < 4'.
|
|
||||||
// Later passes will then fuse p and b.
|
// Later passes will then fuse p and b.
|
||||||
func fuseIntegerComparisons(b *Block) bool {
|
//
|
||||||
|
// In other words `if x || y { ... }` will become `if x | y { ... }` and `if x && y { ... }` will
|
||||||
|
// become `if x & y { ... }`. This is a useful transformation because we can then use rewrite
|
||||||
|
// rules to optimize `x | y` and `x & y`.
|
||||||
|
func fuseComparisons(b *Block, canOptControls func(a, b *Value, op Op) bool) bool {
|
||||||
if len(b.Preds) != 1 {
|
if len(b.Preds) != 1 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -45,14 +63,6 @@ func fuseIntegerComparisons(b *Block) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the control values combine to make an integer inequality that
|
|
||||||
// can be further optimized later.
|
|
||||||
bc := b.Controls[0]
|
|
||||||
pc := p.Controls[0]
|
|
||||||
if !areMergeableInequalities(bc, pc) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the first (true) successors match then we have a disjunction (||).
|
// If the first (true) successors match then we have a disjunction (||).
|
||||||
// If the second (false) successors match then we have a conjunction (&&).
|
// If the second (false) successors match then we have a conjunction (&&).
|
||||||
for i, op := range [2]Op{OpOrB, OpAndB} {
|
for i, op := range [2]Op{OpOrB, OpAndB} {
|
||||||
|
@ -60,6 +70,13 @@ func fuseIntegerComparisons(b *Block) bool {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if the control values can be usefully combined.
|
||||||
|
bc := b.Controls[0]
|
||||||
|
pc := p.Controls[0]
|
||||||
|
if !canOptControls(bc, pc, op) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(mundaym): should we also check the cost of executing b?
|
// TODO(mundaym): should we also check the cost of executing b?
|
||||||
// Currently we might speculatively execute b even if b contains
|
// Currently we might speculatively execute b even if b contains
|
||||||
// a lot of instructions. We could just check that len(b.Values)
|
// a lot of instructions. We could just check that len(b.Values)
|
||||||
|
@ -125,7 +142,7 @@ func isUnsignedInequality(v *Value) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func areMergeableInequalities(x, y *Value) bool {
|
func canOptIntInRange(x, y *Value, op Op) bool {
|
||||||
// We need both inequalities to be either in the signed or unsigned domain.
|
// We need both inequalities to be either in the signed or unsigned domain.
|
||||||
// TODO(mundaym): it would also be good to merge when we have an Eq op that
|
// TODO(mundaym): it would also be good to merge when we have an Eq op that
|
||||||
// could be transformed into a Less/Leq. For example in the unsigned
|
// could be transformed into a Less/Leq. For example in the unsigned
|
||||||
|
@ -155,3 +172,60 @@ func areMergeableInequalities(x, y *Value) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// canOptNanCheck reports whether one of arguments is a NaN check and the other
|
||||||
|
// is a comparison with a constant that can be combined together.
|
||||||
|
//
|
||||||
|
// Examples (c must be a constant):
|
||||||
|
//
|
||||||
|
// v != v || v < c => !(c <= v)
|
||||||
|
// v != v || v <= c => !(c < v)
|
||||||
|
// v != v || c < v => !(v <= c)
|
||||||
|
// v != v || c <= v => !(v < c)
|
||||||
|
func canOptNanCheck(x, y *Value, op Op) bool {
|
||||||
|
if op != OpOrB {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i <= 1; i, x, y = i+1, y, x {
|
||||||
|
if len(x.Args) != 2 || x.Args[0] != x.Args[1] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v := x.Args[0]
|
||||||
|
switch x.Op {
|
||||||
|
case OpNeq64F:
|
||||||
|
if y.Op != OpLess64F && y.Op != OpLeq64F {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for j := 0; j <= 1; j++ {
|
||||||
|
a, b := y.Args[j], y.Args[j^1]
|
||||||
|
if a.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Sign bit operations not affect NaN check results. This special case allows us
|
||||||
|
// to optimize statements like `if v != v || Abs(v) > c { ... }`.
|
||||||
|
if (b.Op == OpAbs || b.Op == OpNeg64F) && b.Args[0] == v {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return b == v
|
||||||
|
}
|
||||||
|
case OpNeq32F:
|
||||||
|
if y.Op != OpLess32F && y.Op != OpLeq32F {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for j := 0; j <= 1; j++ {
|
||||||
|
a, b := y.Args[j], y.Args[j^1]
|
||||||
|
if a.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Sign bit operations not affect NaN check results. This special case allows us
|
||||||
|
// to optimize statements like `if v != v || -v > c { ... }`.
|
||||||
|
if b.Op == OpNeg32F && b.Args[0] == v {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return b == v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
|
@ -23957,6 +23957,7 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
b := v.Block
|
b := v.Block
|
||||||
|
typ := &b.Func.Config.Types
|
||||||
// match: (OrB (Less64 (Const64 [c]) x) (Less64 x (Const64 [d])))
|
// match: (OrB (Less64 (Const64 [c]) x) (Less64 x (Const64 [d])))
|
||||||
// cond: c >= d
|
// cond: c >= d
|
||||||
// result: (Less64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
|
// result: (Less64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
|
||||||
|
@ -25269,6 +25270,558 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Less64F x y:(Const64F [c])))
|
||||||
|
// result: (Not (Leq64F y x))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
if x != v_1.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
|
||||||
|
v0.AddArg2(y, x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Leq64F x y:(Const64F [c])))
|
||||||
|
// result: (Not (Less64F y x))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
if x != v_1.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
|
||||||
|
v0.AddArg2(y, x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Less64F y:(Const64F [c]) x))
|
||||||
|
// result: (Not (Leq64F x y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if x != v_1.Args[1] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
|
||||||
|
v0.AddArg2(x, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Leq64F y:(Const64F [c]) x))
|
||||||
|
// result: (Not (Less64F x y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if x != v_1.Args[1] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
|
||||||
|
v0.AddArg2(x, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Less32F x y:(Const32F [c])))
|
||||||
|
// result: (Not (Leq32F y x))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
if x != v_1.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
|
||||||
|
v0.AddArg2(y, x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Leq32F x y:(Const32F [c])))
|
||||||
|
// result: (Not (Less32F y x))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
if x != v_1.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
|
||||||
|
v0.AddArg2(y, x)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Less32F y:(Const32F [c]) x))
|
||||||
|
// result: (Not (Leq32F x y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if x != v_1.Args[1] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
|
||||||
|
v0.AddArg2(x, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Leq32F y:(Const32F [c]) x))
|
||||||
|
// result: (Not (Less32F x y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if x != v_1.Args[1] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
|
||||||
|
v0.AddArg2(x, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Less64F abs:(Abs x) y:(Const64F [c])))
|
||||||
|
// result: (Not (Leq64F y abs))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
abs := v_1.Args[0]
|
||||||
|
if abs.Op != OpAbs || x != abs.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
|
||||||
|
v0.AddArg2(y, abs)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Leq64F abs:(Abs x) y:(Const64F [c])))
|
||||||
|
// result: (Not (Less64F y abs))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
abs := v_1.Args[0]
|
||||||
|
if abs.Op != OpAbs || x != abs.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
|
||||||
|
v0.AddArg2(y, abs)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Less64F y:(Const64F [c]) abs:(Abs x)))
|
||||||
|
// result: (Not (Leq64F abs y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
abs := v_1.Args[1]
|
||||||
|
if abs.Op != OpAbs || x != abs.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
|
||||||
|
v0.AddArg2(abs, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Leq64F y:(Const64F [c]) abs:(Abs x)))
|
||||||
|
// result: (Not (Less64F abs y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
abs := v_1.Args[1]
|
||||||
|
if abs.Op != OpAbs || x != abs.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
|
||||||
|
v0.AddArg2(abs, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Less64F neg:(Neg64F x) y:(Const64F [c])))
|
||||||
|
// result: (Not (Leq64F y neg))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
neg := v_1.Args[0]
|
||||||
|
if neg.Op != OpNeg64F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
|
||||||
|
v0.AddArg2(y, neg)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Leq64F neg:(Neg64F x) y:(Const64F [c])))
|
||||||
|
// result: (Not (Less64F y neg))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
neg := v_1.Args[0]
|
||||||
|
if neg.Op != OpNeg64F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
|
||||||
|
v0.AddArg2(y, neg)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Less64F y:(Const64F [c]) neg:(Neg64F x)))
|
||||||
|
// result: (Not (Leq64F neg y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
neg := v_1.Args[1]
|
||||||
|
if neg.Op != OpNeg64F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq64F, typ.Bool)
|
||||||
|
v0.AddArg2(neg, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq64F x x) (Leq64F y:(Const64F [c]) neg:(Neg64F x)))
|
||||||
|
// result: (Not (Less64F neg y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst64F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
neg := v_1.Args[1]
|
||||||
|
if neg.Op != OpNeg64F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess64F, typ.Bool)
|
||||||
|
v0.AddArg2(neg, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Less32F neg:(Neg32F x) y:(Const32F [c])))
|
||||||
|
// result: (Not (Leq32F y neg))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
neg := v_1.Args[0]
|
||||||
|
if neg.Op != OpNeg32F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
|
||||||
|
v0.AddArg2(y, neg)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Leq32F neg:(Neg32F x) y:(Const32F [c])))
|
||||||
|
// result: (Not (Less32F y neg))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
neg := v_1.Args[0]
|
||||||
|
if neg.Op != OpNeg32F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
y := v_1.Args[1]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
|
||||||
|
v0.AddArg2(y, neg)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Less32F y:(Const32F [c]) neg:(Neg32F x)))
|
||||||
|
// result: (Not (Leq32F neg y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLess32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
neg := v_1.Args[1]
|
||||||
|
if neg.Op != OpNeg32F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLeq32F, typ.Bool)
|
||||||
|
v0.AddArg2(neg, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// match: (OrB (Neq32F x x) (Leq32F y:(Const32F [c]) neg:(Neg32F x)))
|
||||||
|
// result: (Not (Less32F neg y))
|
||||||
|
for {
|
||||||
|
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||||
|
if v_0.Op != OpNeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
x := v_0.Args[1]
|
||||||
|
if x != v_0.Args[0] || v_1.Op != OpLeq32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ = v_1.Args[1]
|
||||||
|
y := v_1.Args[0]
|
||||||
|
if y.Op != OpConst32F {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
neg := v_1.Args[1]
|
||||||
|
if neg.Op != OpNeg32F || x != neg.Args[0] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
v.reset(OpNot)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpLess32F, typ.Bool)
|
||||||
|
v0.AddArg2(neg, y)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValuegeneric_OpPhi(v *Value) bool {
|
func rewriteValuegeneric_OpPhi(v *Value) bool {
|
||||||
|
|
|
@ -623,6 +623,110 @@ func TestInf(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrGtZero64(x float64) bool {
|
||||||
|
return math.IsNaN(x) || x > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrGteZero64(x float64) bool {
|
||||||
|
return x >= 0 || math.IsNaN(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrLtZero64(x float64) bool {
|
||||||
|
return x < 0 || math.IsNaN(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrLteZero64(x float64) bool {
|
||||||
|
return math.IsNaN(x) || x <= 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFusedNaNChecks64(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
value float64
|
||||||
|
isZero bool
|
||||||
|
isGreaterThanZero bool
|
||||||
|
isLessThanZero bool
|
||||||
|
isNaN bool
|
||||||
|
}{
|
||||||
|
{value: 0.0, isZero: true},
|
||||||
|
{value: math.Copysign(0, -1), isZero: true},
|
||||||
|
{value: 1.0, isGreaterThanZero: true},
|
||||||
|
{value: -1.0, isLessThanZero: true},
|
||||||
|
{value: math.Inf(1), isGreaterThanZero: true},
|
||||||
|
{value: math.Inf(-1), isLessThanZero: true},
|
||||||
|
{value: math.NaN(), isNaN: true},
|
||||||
|
}
|
||||||
|
|
||||||
|
check := func(name string, f func(x float64) bool, value float64, want bool) {
|
||||||
|
got := f(value)
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("%v(%g): want %v, got %v", name, value, want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
check("isNaNOrGtZero64", isNaNOrGtZero64, test.value, test.isNaN || test.isGreaterThanZero)
|
||||||
|
check("isNaNOrGteZero64", isNaNOrGteZero64, test.value, test.isNaN || test.isGreaterThanZero || test.isZero)
|
||||||
|
check("isNaNOrLtZero64", isNaNOrLtZero64, test.value, test.isNaN || test.isLessThanZero)
|
||||||
|
check("isNaNOrLteZero64", isNaNOrLteZero64, test.value, test.isNaN || test.isLessThanZero || test.isZero)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrGtZero32(x float32) bool {
|
||||||
|
return x > 0 || x != x
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrGteZero32(x float32) bool {
|
||||||
|
return x != x || x >= 0
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrLtZero32(x float32) bool {
|
||||||
|
return x != x || x < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
//go:noinline
|
||||||
|
func isNaNOrLteZero32(x float32) bool {
|
||||||
|
return x <= 0 || x != x
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFusedNaNChecks32(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
value float32
|
||||||
|
isZero bool
|
||||||
|
isGreaterThanZero bool
|
||||||
|
isLessThanZero bool
|
||||||
|
isNaN bool
|
||||||
|
}{
|
||||||
|
{value: 0.0, isZero: true},
|
||||||
|
{value: float32(math.Copysign(0, -1)), isZero: true},
|
||||||
|
{value: 1.0, isGreaterThanZero: true},
|
||||||
|
{value: -1.0, isLessThanZero: true},
|
||||||
|
{value: float32(math.Inf(1)), isGreaterThanZero: true},
|
||||||
|
{value: float32(math.Inf(-1)), isLessThanZero: true},
|
||||||
|
{value: float32(math.NaN()), isNaN: true},
|
||||||
|
}
|
||||||
|
|
||||||
|
check := func(name string, f func(x float32) bool, value float32, want bool) {
|
||||||
|
got := f(value)
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("%v(%g): want %v, got %v", name, value, want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
check("isNaNOrGtZero32", isNaNOrGtZero32, test.value, test.isNaN || test.isGreaterThanZero)
|
||||||
|
check("isNaNOrGteZero32", isNaNOrGteZero32, test.value, test.isNaN || test.isGreaterThanZero || test.isZero)
|
||||||
|
check("isNaNOrLtZero32", isNaNOrLtZero32, test.value, test.isNaN || test.isLessThanZero)
|
||||||
|
check("isNaNOrLteZero32", isNaNOrLteZero32, test.value, test.isNaN || test.isLessThanZero || test.isZero)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var sinkFloat float64
|
var sinkFloat float64
|
||||||
|
|
||||||
func BenchmarkMul2(b *testing.B) {
|
func BenchmarkMul2(b *testing.B) {
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
package codegen
|
package codegen
|
||||||
|
|
||||||
|
import "math"
|
||||||
|
|
||||||
// Notes:
|
// Notes:
|
||||||
// - these examples use channels to provide a source of
|
// - these examples use channels to provide a source of
|
||||||
// unknown values that cannot be optimized away
|
// unknown values that cannot be optimized away
|
||||||
|
@ -196,6 +198,84 @@ func ui4d(c <-chan uint8) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------//
|
||||||
|
// merge NaN checks //
|
||||||
|
// ------------------------------------ //
|
||||||
|
|
||||||
|
func f64NaNOrPosInf(c <-chan float64) {
|
||||||
|
// This test assumes IsInf(x, 1) is implemented as x > MaxFloat rather than x == Inf(1).
|
||||||
|
|
||||||
|
// amd64:"JCS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FCLASSD",-"FLED",-"FLTD",-"FNED",-"FEQD"
|
||||||
|
for x := <-c; math.IsNaN(x) || math.IsInf(x, 1); x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f64NaNOrNegInf(c <-chan float64) {
|
||||||
|
// This test assumes IsInf(x, -1) is implemented as x < -MaxFloat rather than x == Inf(-1).
|
||||||
|
|
||||||
|
// amd64:"JCS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FCLASSD",-"FLED",-"FLTD",-"FNED",-"FEQD"
|
||||||
|
for x := <-c; math.IsNaN(x) || math.IsInf(x, -1); x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f64NaNOrLtOne(c <-chan float64) {
|
||||||
|
// amd64:"JCS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLED",-"FLTD",-"FNED",-"FEQD"
|
||||||
|
for x := <-c; math.IsNaN(x) || x < 1; x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f64NaNOrLteOne(c <-chan float64) {
|
||||||
|
// amd64:"JLS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLTD",-"FLED",-"FNED",-"FEQD"
|
||||||
|
for x := <-c; x <= 1 || math.IsNaN(x); x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f64NaNOrGtOne(c <-chan float64) {
|
||||||
|
// amd64:"JCS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLED",-"FLTD",-"FNED",-"FEQD"
|
||||||
|
for x := <-c; math.IsNaN(x) || x > 1; x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f64NaNOrGteOne(c <-chan float64) {
|
||||||
|
// amd64:"JLS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLTD",-"FLED",-"FNED",-"FEQD"
|
||||||
|
for x := <-c; x >= 1 || math.IsNaN(x); x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f32NaNOrLtOne(c <-chan float32) {
|
||||||
|
// amd64:"JCS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLES",-"FLTS",-"FNES",-"FEQS"
|
||||||
|
for x := <-c; x < 1 || x != x; x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f32NaNOrLteOne(c <-chan float32) {
|
||||||
|
// amd64:"JLS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLTS",-"FLES",-"FNES",-"FEQS"
|
||||||
|
for x := <-c; x != x || x <= 1; x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f32NaNOrGtOne(c <-chan float32) {
|
||||||
|
// amd64:"JCS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLES",-"FLTS",-"FNES",-"FEQS"
|
||||||
|
for x := <-c; x > 1 || x != x; x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f32NaNOrGteOne(c <-chan float32) {
|
||||||
|
// amd64:"JLS",-"JNE",-"JPS",-"JPC"
|
||||||
|
// riscv64:"FLTS",-"FLES",-"FNES",-"FEQS"
|
||||||
|
for x := <-c; x != x || x >= 1; x = <-c {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ------------------------------------ //
|
// ------------------------------------ //
|
||||||
// regressions //
|
// regressions //
|
||||||
// ------------------------------------ //
|
// ------------------------------------ //
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue