go/test/codegen/floats.go
Michael Munday 34aef89366 cmd/compile: use FCLASSD for subnormal checks on riscv64
Only implemented for 64 bit floating point operations for now.

goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
                    │       sec/op        │   sec/op     vs base                │
Acos                          154.1n ± 0%   154.1n ± 0%        ~ (p=0.303 n=10)
Acosh                         215.8n ± 6%   226.7n ± 0%        ~ (p=0.439 n=10)
Asin                          149.2n ± 1%   149.2n ± 0%        ~ (p=0.700 n=10)
Asinh                         262.1n ± 0%   258.5n ± 0%   -1.37% (p=0.000 n=10)
Atan                          99.48n ± 0%   99.49n ± 0%        ~ (p=0.836 n=10)
Atanh                         244.9n ± 0%   243.8n ± 0%   -0.43% (p=0.002 n=10)
Atan2                         158.2n ± 1%   153.3n ± 0%   -3.10% (p=0.000 n=10)
Cbrt                          186.8n ± 0%   181.1n ± 0%   -3.03% (p=0.000 n=10)
Ceil                          36.71n ± 1%   36.71n ± 0%        ~ (p=0.434 n=10)
Copysign                      6.531n ± 1%   6.526n ± 0%        ~ (p=0.268 n=10)
Cos                           98.19n ± 0%   95.40n ± 0%   -2.84% (p=0.000 n=10)
Cosh                          233.1n ± 0%   222.6n ± 0%   -4.50% (p=0.000 n=10)
Erf                           122.5n ± 0%   114.2n ± 0%   -6.78% (p=0.000 n=10)
Erfc                          126.0n ± 1%   116.6n ± 0%   -7.46% (p=0.000 n=10)
Erfinv                        138.8n ± 0%   138.6n ± 0%        ~ (p=0.082 n=10)
Erfcinv                       140.0n ± 0%   139.7n ± 0%        ~ (p=0.359 n=10)
Exp                           193.3n ± 0%   184.2n ± 0%   -4.68% (p=0.000 n=10)
ExpGo                         204.8n ± 0%   194.5n ± 0%   -5.03% (p=0.000 n=10)
Expm1                         152.5n ± 1%   145.0n ± 0%   -4.92% (p=0.000 n=10)
Exp2                          174.5n ± 0%   164.2n ± 0%   -5.85% (p=0.000 n=10)
Exp2Go                        184.4n ± 1%   175.4n ± 0%   -4.88% (p=0.000 n=10)
Abs                           4.912n ± 0%   4.914n ± 0%        ~ (p=0.283 n=10)
Dim                           15.50n ± 1%   15.52n ± 1%        ~ (p=0.331 n=10)
Floor                         36.89n ± 1%   36.76n ± 1%        ~ (p=0.325 n=10)
Max                           31.05n ± 1%   31.17n ± 1%        ~ (p=0.628 n=10)
Min                           31.01n ± 0%   31.06n ± 0%        ~ (p=0.767 n=10)
Mod                           294.1n ± 0%   245.6n ± 0%  -16.52% (p=0.000 n=10)
Frexp                         44.86n ± 1%   35.20n ± 0%  -21.53% (p=0.000 n=10)
Gamma                         195.8n ± 0%   185.4n ± 1%   -5.29% (p=0.000 n=10)
Hypot                         84.91n ± 0%   84.54n ± 1%   -0.43% (p=0.006 n=10)
HypotGo                       96.70n ± 0%   95.42n ± 1%   -1.32% (p=0.000 n=10)
Ilogb                         45.03n ± 0%   35.07n ± 1%  -22.10% (p=0.000 n=10)
J0                            634.5n ± 0%   627.2n ± 0%   -1.16% (p=0.000 n=10)
J1                            644.5n ± 0%   636.9n ± 0%   -1.18% (p=0.000 n=10)
Jn                            1.357µ ± 0%   1.344µ ± 0%   -0.92% (p=0.000 n=10)
Ldexp                         49.89n ± 0%   39.96n ± 0%  -19.90% (p=0.000 n=10)
Lgamma                        186.6n ± 0%   184.3n ± 0%   -1.21% (p=0.000 n=10)
Log                           150.4n ± 0%   141.1n ± 0%   -6.15% (p=0.000 n=10)
Logb                          46.70n ± 0%   35.89n ± 0%  -23.15% (p=0.000 n=10)
Log1p                         164.1n ± 0%   163.9n ± 0%        ~ (p=0.122 n=10)
Log10                         153.1n ± 0%   143.5n ± 0%   -6.24% (p=0.000 n=10)
Log2                          58.83n ± 0%   49.75n ± 0%  -15.43% (p=0.000 n=10)
Modf                          40.82n ± 1%   40.78n ± 0%        ~ (p=0.239 n=10)
Nextafter32                   49.15n ± 0%   48.93n ± 0%   -0.44% (p=0.011 n=10)
Nextafter64                   43.33n ± 0%   43.23n ± 0%        ~ (p=0.228 n=10)
PowInt                        269.4n ± 0%   243.8n ± 0%   -9.49% (p=0.000 n=10)
PowFrac                       618.0n ± 0%   571.7n ± 0%   -7.48% (p=0.000 n=10)
Pow10Pos                      13.09n ± 0%   13.05n ± 0%   -0.31% (p=0.003 n=10)
Pow10Neg                      30.99n ± 1%   30.99n ± 0%        ~ (p=0.173 n=10)
Round                         23.73n ± 0%   23.65n ± 0%   -0.36% (p=0.011 n=10)
RoundToEven                   27.87n ± 0%   27.73n ± 0%   -0.48% (p=0.003 n=10)
Remainder                     282.1n ± 0%   249.6n ± 0%  -11.52% (p=0.000 n=10)
Signbit                       11.46n ± 0%   11.42n ± 0%   -0.39% (p=0.003 n=10)
Sin                           115.2n ± 0%   113.2n ± 0%   -1.74% (p=0.000 n=10)
Sincos                        140.6n ± 0%   138.6n ± 0%   -1.39% (p=0.000 n=10)
Sinh                          252.0n ± 0%   241.4n ± 0%   -4.21% (p=0.000 n=10)
SqrtIndirect                  4.909n ± 0%   4.893n ± 0%   -0.34% (p=0.021 n=10)
SqrtLatency                   19.57n ± 1%   19.57n ± 0%        ~ (p=0.087 n=10)
SqrtIndirectLatency           19.64n ± 0%   19.57n ± 0%   -0.36% (p=0.025 n=10)
SqrtGoLatency                 198.1n ± 0%   197.4n ± 0%   -0.35% (p=0.014 n=10)
SqrtPrime                     5.733µ ± 0%   5.725µ ± 0%        ~ (p=0.116 n=10)
Tan                           149.1n ± 0%   146.8n ± 0%   -1.54% (p=0.000 n=10)
Tanh                          248.2n ± 1%   238.1n ± 0%   -4.05% (p=0.000 n=10)
Trunc                         36.86n ± 0%   36.70n ± 0%   -0.43% (p=0.029 n=10)
Y0                            638.2n ± 0%   633.6n ± 0%   -0.71% (p=0.000 n=10)
Y1                            641.8n ± 0%   636.1n ± 0%   -0.87% (p=0.000 n=10)
Yn                            1.358µ ± 0%   1.345µ ± 0%   -0.92% (p=0.000 n=10)
Float64bits                   5.721n ± 0%   5.709n ± 0%   -0.22% (p=0.044 n=10)
Float64frombits               4.905n ± 0%   4.893n ± 0%        ~ (p=0.266 n=10)
Float32bits                   12.27n ± 0%   12.23n ± 0%        ~ (p=0.122 n=10)
Float32frombits               4.909n ± 0%   4.893n ± 0%   -0.32% (p=0.024 n=10)
FMA                           6.556n ± 0%   6.526n ± 0%        ~ (p=0.283 n=10)
geomean                       86.82n        83.75n        -3.54%

Change-Id: I522297a79646d76543d516accce291f5a3cea337
Reviewed-on: https://go-review.googlesource.com/c/go/+/717560
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
2025-11-12 10:03:41 -08:00

315 lines
6.6 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import "math"
// This file contains codegen tests related to arithmetic
// simplifications and optimizations on float types.
// For codegen tests on integer types, see arithmetic.go.
// --------------------- //
// Strength-reduce //
// --------------------- //
func Mul2(f float64) float64 {
// 386/sse2:"ADDSD" -"MULSD"
// amd64:"ADDSD" -"MULSD"
// arm/7:"ADDD" -"MULD"
// arm64:"FADDD" -"FMULD"
// loong64:"ADDD" -"MULD"
// ppc64x:"FADD" -"FMUL"
// riscv64:"FADDD" -"FMULD"
return f * 2.0
}
func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
// 386/sse2:"MULSD" -"DIVSD"
// amd64:"MULSD" -"DIVSD"
// arm/7:"MULD" -"DIVD"
// arm64:"FMULD" -"FDIVD"
// loong64:"MULD" -"DIVD"
// ppc64x:"FMUL" -"FDIV"
// riscv64:"FMULD" -"FDIVD"
x := f1 / 16.0
// 386/sse2:"MULSD" -"DIVSD"
// amd64:"MULSD" -"DIVSD"
// arm/7:"MULD" -"DIVD"
// arm64:"FMULD" -"FDIVD"
// loong64:"MULD" -"DIVD"
// ppc64x:"FMUL" -"FDIVD"
// riscv64:"FMULD" -"FDIVD"
y := f2 / 0.125
// 386/sse2:"ADDSD" -"DIVSD" -"MULSD"
// amd64:"ADDSD" -"DIVSD" -"MULSD"
// arm/7:"ADDD" -"MULD" -"DIVD"
// arm64:"FADDD" -"FMULD" -"FDIVD"
// loong64:"ADDD" -"MULD" -"DIVD"
// ppc64x:"FADD" -"FMUL" -"FDIV"
// riscv64:"FADDD" -"FMULD" -"FDIVD"
z := f3 / 0.5
return x, y, z
}
func indexLoad(b0 []float32, b1 float32, idx int) float32 {
// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
// loong64:`MOVF\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+`
return b0[idx] * b1
}
func indexStore(b0 []float64, b1 float64, idx int) {
// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
// loong64:`MOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`
b0[idx] = b1
}
// ----------- //
// Fused //
// ----------- //
func FusedAdd32(x, y, z float32) float32 {
// s390x:"FMADDS "
// ppc64x:"FMADDS "
// arm64:"FMADDS"
// loong64:"FMADDF "
// riscv64:"FMADDS "
// amd64/v3:"VFMADD231SS "
return x*y + z
}
func FusedSub32_a(x, y, z float32) float32 {
// s390x:"FMSUBS "
// ppc64x:"FMSUBS "
// riscv64:"FMSUBS "
// loong64:"FMSUBF "
return x*y - z
}
func FusedSub32_b(x, y, z float32) float32 {
// arm64:"FMSUBS"
// loong64:"FNMSUBF "
// riscv64:"FNMSUBS "
return z - x*y
}
func FusedAdd64(x, y, z float64) float64 {
// s390x:"FMADD "
// ppc64x:"FMADD "
// arm64:"FMADDD"
// loong64:"FMADDD "
// riscv64:"FMADDD "
// amd64/v3:"VFMADD231SD "
return x*y + z
}
func FusedSub64_a(x, y, z float64) float64 {
// s390x:"FMSUB "
// ppc64x:"FMSUB "
// riscv64:"FMSUBD "
// loong64:"FMSUBD "
return x*y - z
}
func FusedSub64_b(x, y, z float64) float64 {
// arm64:"FMSUBD"
// loong64:"FNMSUBD "
// riscv64:"FNMSUBD "
return z - x*y
}
func Cmp(f float64) bool {
// arm64:"FCMPD" "(BGT|BLE|BMI|BPL)" -"CSET GT" -"CBZ"
return f > 4 || f < -4
}
func CmpZero64(f float64) bool {
// s390x:"LTDBR" -"FCMPU"
return f <= 0
}
func CmpZero32(f float32) bool {
// s390x:"LTEBR" -"CEBR"
return f <= 0
}
func CmpWithSub(a float64, b float64) bool {
f := a - b
// s390x:-"LTDBR"
return f <= 0
}
func CmpWithAdd(a float64, b float64) bool {
f := a + b
// s390x:-"LTDBR"
return f <= 0
}
// ---------------- //
// Non-floats //
// ---------------- //
func ArrayZero() [16]byte {
// amd64:"MOVUPS"
var a [16]byte
return a
}
func ArrayCopy(a [16]byte) (b [16]byte) {
// amd64:"MOVUPS"
b = a
return
}
// ---------------- //
// Float Min/Max //
// ---------------- //
func Float64Min(a, b float64) float64 {
// amd64:"MINSD"
// arm64:"FMIND"
// loong64:"FMIND"
// riscv64:"FMIN"
// ppc64/power9:"XSMINJDP"
// ppc64/power10:"XSMINJDP"
// s390x: "WFMINDB"
return min(a, b)
}
func Float64Max(a, b float64) float64 {
// amd64:"MINSD"
// arm64:"FMAXD"
// loong64:"FMAXD"
// riscv64:"FMAX"
// ppc64/power9:"XSMAXJDP"
// ppc64/power10:"XSMAXJDP"
// s390x: "WFMAXDB"
return max(a, b)
}
func Float32Min(a, b float32) float32 {
// amd64:"MINSS"
// arm64:"FMINS"
// loong64:"FMINF"
// riscv64:"FMINS"
// ppc64/power9:"XSMINJDP"
// ppc64/power10:"XSMINJDP"
// s390x: "WFMINSB"
return min(a, b)
}
func Float32Max(a, b float32) float32 {
// amd64:"MINSS"
// arm64:"FMAXS"
// loong64:"FMAXF"
// riscv64:"FMAXS"
// ppc64/power9:"XSMAXJDP"
// ppc64/power10:"XSMAXJDP"
// s390x: "WFMAXSB"
return max(a, b)
}
// ------------------------ //
// Constant Optimizations //
// ------------------------ //
func Float32ConstantZero() float32 {
// arm64:"FMOVS ZR,"
return 0.0
}
func Float32ConstantChipFloat() float32 {
// arm64:"FMOVS [$]\\(2\\.25\\),"
return 2.25
}
func Float32Constant() float32 {
// arm64:"FMOVS [$]f32\\.42440000\\(SB\\)"
// ppc64x/power8:"FMOVS [$]f32\\.42440000\\(SB\\)"
// ppc64x/power9:"FMOVS [$]f32\\.42440000\\(SB\\)"
// ppc64x/power10:"XXSPLTIDP [$]1111752704,"
return 49.0
}
func Float64ConstantZero() float64 {
// arm64:"FMOVD ZR,"
return 0.0
}
func Float64ConstantChipFloat() float64 {
// arm64:"FMOVD [$]\\(2\\.25\\),"
return 2.25
}
func Float64Constant() float64 {
// arm64:"FMOVD [$]f64\\.4048800000000000\\(SB\\)"
// ppc64x/power8:"FMOVD [$]f64\\.4048800000000000\\(SB\\)"
// ppc64x/power9:"FMOVD [$]f64\\.4048800000000000\\(SB\\)"
// ppc64x/power10:"XXSPLTIDP [$]1111752704,"
return 49.0
}
func Float32DenormalConstant() float32 {
// ppc64x:"FMOVS [$]f32\\.00400000\\(SB\\)"
return 0x1p-127
}
// A float64 constant which can be exactly represented as a
// denormal float32 value. On ppc64x, denormal values cannot
// be used with XXSPLTIDP.
func Float64DenormalFloat32Constant() float64 {
// ppc64x:"FMOVD [$]f64\\.3800000000000000\\(SB\\)"
return 0x1p-127
}
func Float32ConstantStore(p *float32) {
// amd64:"MOVL [$]1085133554"
// riscv64: "MOVF [$]f32.40add2f2"
*p = 5.432
}
func Float64ConstantStore(p *float64) {
// amd64: "MOVQ [$]4617801906721357038"
// riscv64: "MOVD [$]f64.4015ba5e353f7cee"
*p = 5.432
}
// ------------------------ //
// Subnormal tests //
// ------------------------ //
func isSubnormal(x float64) bool {
// riscv64:"FCLASSD" -"FABSD"
return math.Abs(x) < 2.2250738585072014e-308
}
func isNormal(x float64) bool {
// riscv64:"FCLASSD" -"FABSD"
return math.Abs(x) >= 0x1p-1022
}
func isPosSubnormal(x float64) bool {
// riscv64:"FCLASSD"
return x > 0 && x < 2.2250738585072014e-308
}
func isNegSubnormal(x float64) bool {
// riscv64:"FCLASSD"
return x < 0 && x > -0x1p-1022
}
func isPosNormal(x float64) bool {
// riscv64:"FCLASSD"
return x >= 2.2250738585072014e-308
}
func isNegNormal(x float64) bool {
// riscv64:"FCLASSD"
return x <= -2.2250738585072014e-308
}