From 4e05a070c4f88ebf43f0873e144c62d02d56060b Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Mon, 11 Aug 2025 23:26:59 +0100 Subject: [PATCH] math: implement IsInf using Abs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abs is an intrinsic (or a relatively cheap operation) on most architectures. Using it in IsInf typically saves a branch when `sign` is 0 (note the `sign` variable is typically a constant). This change doesn't make a huge difference on amd64 (these benchmarks are fairly noisy too) but removing the branch will allow rewrite rules to detect and optimize infinity checks on other architectures. For example, riscv64 can check for infinities with the FCLASSD instruction and s390x can use the TCDB instruction. goos: linux goarch: amd64 pkg: math cpu: 12th Gen Intel(R) Core(TM) i7-12700T │ sec/op │ sec/op vs base │ Acos 4.317n ± 1% 4.321n ± 0% ~ (p=0.466 n=10) Acosh 8.857n ± 1% 8.411n ± 2% -5.05% (p=0.001 n=10) Asin 4.260n ± 1% 4.204n ± 6% -1.31% (p=0.021 n=10) Asinh 10.63n ± 2% 10.37n ± 0% -2.49% (p=0.000 n=10) Atan 2.493n ± 1% 2.368n ± 0% -5.01% (p=0.000 n=10) Atanh 8.820n ± 4% 8.770n ± 2% ~ (p=0.579 n=10) Atan2 4.212n ± 1% 4.066n ± 11% -3.45% (p=0.023 n=10) Cbrt 4.859n ± 0% 4.845n ± 0% -0.29% (p=0.000 n=10) Ceil 0.3877n ± 3% 0.2514n ± 0% -35.17% (p=0.000 n=10) Copysign 0.3479n ± 2% 0.4179n ± 0% +20.14% (p=0.000 n=10) Cos 4.734n ± 2% 4.486n ± 0% -5.26% (p=0.000 n=10) Cosh 5.244n ± 0% 5.071n ± 0% -3.29% (p=0.000 n=10) Erf 2.975n ± 1% 2.788n ± 0% -6.29% (p=0.000 n=10) Erfc 3.259n ± 1% 3.121n ± 0% -4.23% (p=0.000 n=10) Erfinv 4.015n ± 1% 3.904n ± 0% -2.76% (p=0.000 n=10) Erfcinv 4.166n ± 1% 4.039n ± 0% -3.04% (p=0.000 n=10) Exp 3.567n ± 1% 3.429n ± 0% -3.87% (p=0.000 n=10) ExpGo 9.173n ± 1% 8.368n ± 2% -8.78% (p=0.000 n=10) Expm1 4.466n ± 3% 4.419n ± 0% -1.05% (p=0.000 n=10) Exp2 8.328n ± 0% 8.046n ± 0% -3.39% (p=0.000 n=10) Exp2Go 8.796n ± 5% 8.237n ± 2% -6.36% (p=0.000 n=10) Abs 0.2400n ± 2% 0.2144n ± 0% -10.71% (p=0.000 n=10) Dim 0.4077n ± 3% 0.3795n ± 1% -6.91% (p=0.000 n=10) Floor 0.3616n ± 2% 0.2528n ± 3% -30.10% (p=0.000 n=10) Max 1.401n ± 1% 1.344n ± 1% -4.14% (p=0.000 n=10) Min 1.391n ± 1% 1.345n ± 1% -3.27% (p=0.000 n=10) Mod 15.45n ± 1% 15.62n ± 2% ~ (p=0.066 n=10) Frexp 1.838n ± 2% 1.605n ± 1% -12.70% (p=0.000 n=10) Gamma 4.465n ± 1% 4.458n ± 1% ~ (p=0.256 n=10) Hypot 2.237n ± 1% 2.208n ± 0% -1.32% (p=0.000 n=10) HypotGo 2.610n ± 3% 2.663n ± 5% ~ (p=0.280 n=10) Ilogb 1.793n ± 1% 1.566n ± 1% -12.66% (p=0.000 n=10) J0 22.11n ± 1% 21.45n ± 1% -2.99% (p=0.000 n=10) J1 21.71n ± 1% 21.38n ± 1% -1.54% (p=0.000 n=10) Jn 46.43n ± 1% 45.83n ± 1% -1.30% (p=0.001 n=10) Ldexp 2.360n ± 1% 2.111n ± 1% -10.51% (p=0.000 n=10) Lgamma 4.728n ± 1% 4.850n ± 2% +2.59% (p=0.000 n=10) Log 4.304n ± 2% 4.228n ± 1% -1.78% (p=0.000 n=10) Logb 1.833n ± 2% 1.635n ± 2% -10.80% (p=0.000 n=10) Log1p 5.262n ± 2% 5.173n ± 2% -1.69% (p=0.001 n=10) Log10 4.534n ± 1% 4.474n ± 1% -1.33% (p=0.024 n=10) Log2 2.510n ± 2% 2.246n ± 2% -10.48% (p=0.000 n=10) Modf 1.712n ± 3% 1.700n ± 1% ~ (p=0.055 n=10) Nextafter32 2.190n ± 3% 2.187n ± 0% ~ (p=0.266 n=10) Nextafter64 2.184n ± 0% 2.183n ± 0% -0.05% (p=0.017 n=10) PowInt 11.45n ± 7% 11.32n ± 9% ~ (p=0.137 n=10) PowFrac 27.46n ± 3% 27.04n ± 1% -1.55% (p=0.001 n=10) Pow10Pos 0.5367n ± 3% 0.5466n ± 2% +1.84% (p=0.009 n=10) Pow10Neg 0.8939n ± 1% 0.8720n ± 2% -2.45% (p=0.000 n=10) Round 1.218n ± 1% 1.198n ± 1% -1.56% (p=0.005 n=10) RoundToEven 1.711n ± 0% 1.710n ± 0% ~ (p=0.464 n=10) Remainder 12.87n ± 10% 13.79n ± 14% +7.11% (p=0.027 n=10) Signbit 0.4072n ± 2% 0.3839n ± 2% -5.71% (p=0.000 n=10) Sin 4.102n ± 1% 4.058n ± 3% ~ (p=0.138 n=10) Sincos 5.837n ± 1% 5.715n ± 2% -2.10% (p=0.000 n=10) Sinh 5.622n ± 1% 5.567n ± 2% -0.96% (p=0.006 n=10) SqrtIndirect 0.4284n ± 0% 0.4279n ± 0% ~ (p=0.084 n=10) SqrtLatency 2.779n ± 0% 2.777n ± 0% ~ (p=0.089 n=10) SqrtIndirectLatency 2.777n ± 0% 2.778n ± 0% ~ (p=0.305 n=10) SqrtGoLatency 24.00n ± 0% 24.51n ± 0% +2.12% (p=0.000 n=10) SqrtPrime 673.0n ± 0% 673.0n ± 0% ~ (p=0.574 n=10) Tan 4.111n ± 4% 4.123n ± 5% ~ (p=0.424 n=10) Tanh 5.787n ± 1% 5.723n ± 1% -1.11% (p=0.010 n=10) Trunc 0.3441n ± 3% 0.2596n ± 2% -24.56% (p=0.000 n=10) Y0 21.63n ± 2% 21.07n ± 2% -2.61% (p=0.001 n=10) Y1 21.42n ± 1% 20.93n ± 3% -2.29% (p=0.041 n=10) Yn 45.78n ± 1% 45.83n ± 1% ~ (p=0.671 n=10) Float64bits 0.2187n ± 2% 0.2199n ± 2% ~ (p=0.138 n=10) Float64frombits 0.2198n ± 1% 0.2199n ± 1% ~ (p=0.956 n=10) Float32bits 0.2237n ± 2% 0.2213n ± 1% ~ (p=0.060 n=10) Float32frombits 0.2251n ± 1% 0.2219n ± 2% -1.42% (p=0.000 n=10) FMA 0.8557n ± 1% 0.8555n ± 0% ~ (p=0.286 n=10) geomean 3.186n 3.070n -3.61% Change-Id: I4814bb1e3d9d20e9d8cd7689e8d5383e36b00331 Reviewed-on: https://go-review.googlesource.com/c/go/+/694955 Reviewed-by: Cherry Mui Reviewed-by: Sean Liao LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/math/bits.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/math/bits.go b/src/math/bits.go index c5cb93b1594..3716a411f4f 100644 --- a/src/math/bits.go +++ b/src/math/bits.go @@ -48,7 +48,12 @@ func IsInf(f float64, sign int) bool { // To avoid the floating-point hardware, could use: // x := Float64bits(f); // return sign >= 0 && x == uvinf || sign <= 0 && x == uvneginf; - return sign >= 0 && f > MaxFloat64 || sign <= 0 && f < -MaxFloat64 + if sign == 0 { + f = Abs(f) + } else if sign < 0 { + f = -f + } + return f > MaxFloat64 } // normalize returns a normal number y and exponent exp