From 4e05a070c4f88ebf43f0873e144c62d02d56060b Mon Sep 17 00:00:00 2001
From: Michael Munday <mndygolang+git@gmail.com>
Date: Mon, 11 Aug 2025 23:26:59 +0100
Subject: [PATCH] math: implement IsInf using Abs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Abs is an intrinsic (or a relatively cheap operation) on most
architectures. Using it in IsInf typically saves a branch when
`sign` is 0 (note the `sign` variable is typically a constant).

This change doesn't make a huge difference on amd64 (these
benchmarks are fairly noisy too) but removing the branch will
allow rewrite rules to detect and optimize infinity checks on
other architectures. For example, riscv64 can check for
infinities with the FCLASSD instruction and s390x can use the
TCDB instruction.

goos: linux
goarch: amd64
pkg: math
cpu: 12th Gen Intel(R) Core(TM) i7-12700T
                    │          sec/op          │    sec/op      vs base                │
Acos                              4.317n ±  1%    4.321n ±  0%        ~ (p=0.466 n=10)
Acosh                             8.857n ±  1%    8.411n ±  2%   -5.05% (p=0.001 n=10)
Asin                              4.260n ±  1%    4.204n ±  6%   -1.31% (p=0.021 n=10)
Asinh                             10.63n ±  2%    10.37n ±  0%   -2.49% (p=0.000 n=10)
Atan                              2.493n ±  1%    2.368n ±  0%   -5.01% (p=0.000 n=10)
Atanh                             8.820n ±  4%    8.770n ±  2%        ~ (p=0.579 n=10)
Atan2                             4.212n ±  1%    4.066n ± 11%   -3.45% (p=0.023 n=10)
Cbrt                              4.859n ±  0%    4.845n ±  0%   -0.29% (p=0.000 n=10)
Ceil                             0.3877n ±  3%   0.2514n ±  0%  -35.17% (p=0.000 n=10)
Copysign                         0.3479n ±  2%   0.4179n ±  0%  +20.14% (p=0.000 n=10)
Cos                               4.734n ±  2%    4.486n ±  0%   -5.26% (p=0.000 n=10)
Cosh                              5.244n ±  0%    5.071n ±  0%   -3.29% (p=0.000 n=10)
Erf                               2.975n ±  1%    2.788n ±  0%   -6.29% (p=0.000 n=10)
Erfc                              3.259n ±  1%    3.121n ±  0%   -4.23% (p=0.000 n=10)
Erfinv                            4.015n ±  1%    3.904n ±  0%   -2.76% (p=0.000 n=10)
Erfcinv                           4.166n ±  1%    4.039n ±  0%   -3.04% (p=0.000 n=10)
Exp                               3.567n ±  1%    3.429n ±  0%   -3.87% (p=0.000 n=10)
ExpGo                             9.173n ±  1%    8.368n ±  2%   -8.78% (p=0.000 n=10)
Expm1                             4.466n ±  3%    4.419n ±  0%   -1.05% (p=0.000 n=10)
Exp2                              8.328n ±  0%    8.046n ±  0%   -3.39% (p=0.000 n=10)
Exp2Go                            8.796n ±  5%    8.237n ±  2%   -6.36% (p=0.000 n=10)
Abs                              0.2400n ±  2%   0.2144n ±  0%  -10.71% (p=0.000 n=10)
Dim                              0.4077n ±  3%   0.3795n ±  1%   -6.91% (p=0.000 n=10)
Floor                            0.3616n ±  2%   0.2528n ±  3%  -30.10% (p=0.000 n=10)
Max                               1.401n ±  1%    1.344n ±  1%   -4.14% (p=0.000 n=10)
Min                               1.391n ±  1%    1.345n ±  1%   -3.27% (p=0.000 n=10)
Mod                               15.45n ±  1%    15.62n ±  2%        ~ (p=0.066 n=10)
Frexp                             1.838n ±  2%    1.605n ±  1%  -12.70% (p=0.000 n=10)
Gamma                             4.465n ±  1%    4.458n ±  1%        ~ (p=0.256 n=10)
Hypot                             2.237n ±  1%    2.208n ±  0%   -1.32% (p=0.000 n=10)
HypotGo                           2.610n ±  3%    2.663n ±  5%        ~ (p=0.280 n=10)
Ilogb                             1.793n ±  1%    1.566n ±  1%  -12.66% (p=0.000 n=10)
J0                                22.11n ±  1%    21.45n ±  1%   -2.99% (p=0.000 n=10)
J1                                21.71n ±  1%    21.38n ±  1%   -1.54% (p=0.000 n=10)
Jn                                46.43n ±  1%    45.83n ±  1%   -1.30% (p=0.001 n=10)
Ldexp                             2.360n ±  1%    2.111n ±  1%  -10.51% (p=0.000 n=10)
Lgamma                            4.728n ±  1%    4.850n ±  2%   +2.59% (p=0.000 n=10)
Log                               4.304n ±  2%    4.228n ±  1%   -1.78% (p=0.000 n=10)
Logb                              1.833n ±  2%    1.635n ±  2%  -10.80% (p=0.000 n=10)
Log1p                             5.262n ±  2%    5.173n ±  2%   -1.69% (p=0.001 n=10)
Log10                             4.534n ±  1%    4.474n ±  1%   -1.33% (p=0.024 n=10)
Log2                              2.510n ±  2%    2.246n ±  2%  -10.48% (p=0.000 n=10)
Modf                              1.712n ±  3%    1.700n ±  1%        ~ (p=0.055 n=10)
Nextafter32                       2.190n ±  3%    2.187n ±  0%        ~ (p=0.266 n=10)
Nextafter64                       2.184n ±  0%    2.183n ±  0%   -0.05% (p=0.017 n=10)
PowInt                            11.45n ±  7%    11.32n ±  9%        ~ (p=0.137 n=10)
PowFrac                           27.46n ±  3%    27.04n ±  1%   -1.55% (p=0.001 n=10)
Pow10Pos                         0.5367n ±  3%   0.5466n ±  2%   +1.84% (p=0.009 n=10)
Pow10Neg                         0.8939n ±  1%   0.8720n ±  2%   -2.45% (p=0.000 n=10)
Round                             1.218n ±  1%    1.198n ±  1%   -1.56% (p=0.005 n=10)
RoundToEven                       1.711n ±  0%    1.710n ±  0%        ~ (p=0.464 n=10)
Remainder                         12.87n ± 10%    13.79n ± 14%   +7.11% (p=0.027 n=10)
Signbit                          0.4072n ±  2%   0.3839n ±  2%   -5.71% (p=0.000 n=10)
Sin                               4.102n ±  1%    4.058n ±  3%        ~ (p=0.138 n=10)
Sincos                            5.837n ±  1%    5.715n ±  2%   -2.10% (p=0.000 n=10)
Sinh                              5.622n ±  1%    5.567n ±  2%   -0.96% (p=0.006 n=10)
SqrtIndirect                     0.4284n ±  0%   0.4279n ±  0%        ~ (p=0.084 n=10)
SqrtLatency                       2.779n ±  0%    2.777n ±  0%        ~ (p=0.089 n=10)
SqrtIndirectLatency               2.777n ±  0%    2.778n ±  0%        ~ (p=0.305 n=10)
SqrtGoLatency                     24.00n ±  0%    24.51n ±  0%   +2.12% (p=0.000 n=10)
SqrtPrime                         673.0n ±  0%    673.0n ±  0%        ~ (p=0.574 n=10)
Tan                               4.111n ±  4%    4.123n ±  5%        ~ (p=0.424 n=10)
Tanh                              5.787n ±  1%    5.723n ±  1%   -1.11% (p=0.010 n=10)
Trunc                            0.3441n ±  3%   0.2596n ±  2%  -24.56% (p=0.000 n=10)
Y0                                21.63n ±  2%    21.07n ±  2%   -2.61% (p=0.001 n=10)
Y1                                21.42n ±  1%    20.93n ±  3%   -2.29% (p=0.041 n=10)
Yn                                45.78n ±  1%    45.83n ±  1%        ~ (p=0.671 n=10)
Float64bits                      0.2187n ±  2%   0.2199n ±  2%        ~ (p=0.138 n=10)
Float64frombits                  0.2198n ±  1%   0.2199n ±  1%        ~ (p=0.956 n=10)
Float32bits                      0.2237n ±  2%   0.2213n ±  1%        ~ (p=0.060 n=10)
Float32frombits                  0.2251n ±  1%   0.2219n ±  2%   -1.42% (p=0.000 n=10)
FMA                              0.8557n ±  1%   0.8555n ±  0%        ~ (p=0.286 n=10)
geomean                           3.186n          3.070n         -3.61%

Change-Id: I4814bb1e3d9d20e9d8cd7689e8d5383e36b00331
Reviewed-on: https://go-review.googlesource.com/c/go/+/694955
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
---
 src/math/bits.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/math/bits.go b/src/math/bits.go
index c5cb93b1594..3716a411f4f 100644
--- a/src/math/bits.go
+++ b/src/math/bits.go
@@ -48,7 +48,12 @@ func IsInf(f float64, sign int) bool {
 	// To avoid the floating-point hardware, could use:
 	//	x := Float64bits(f);
 	//	return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf;
-	return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64
+	if sign == 0 {
+		f = Abs(f)
+	} else if sign < 0 {
+		f = -f
+	}
+	return f > MaxFloat64
 }
 
 // normalize returns a normal number y and exponent exp