mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
Only implemented for 64 bit floating point operations for now.
goos: linux
goarch: riscv64
pkg: math
cpu: Spacemit(R) X60
│ sec/op │ sec/op vs base │
Acos 154.1n ± 0% 154.1n ± 0% ~ (p=0.303 n=10)
Acosh 215.8n ± 6% 226.7n ± 0% ~ (p=0.439 n=10)
Asin 149.2n ± 1% 149.2n ± 0% ~ (p=0.700 n=10)
Asinh 262.1n ± 0% 258.5n ± 0% -1.37% (p=0.000 n=10)
Atan 99.48n ± 0% 99.49n ± 0% ~ (p=0.836 n=10)
Atanh 244.9n ± 0% 243.8n ± 0% -0.43% (p=0.002 n=10)
Atan2 158.2n ± 1% 153.3n ± 0% -3.10% (p=0.000 n=10)
Cbrt 186.8n ± 0% 181.1n ± 0% -3.03% (p=0.000 n=10)
Ceil 36.71n ± 1% 36.71n ± 0% ~ (p=0.434 n=10)
Copysign 6.531n ± 1% 6.526n ± 0% ~ (p=0.268 n=10)
Cos 98.19n ± 0% 95.40n ± 0% -2.84% (p=0.000 n=10)
Cosh 233.1n ± 0% 222.6n ± 0% -4.50% (p=0.000 n=10)
Erf 122.5n ± 0% 114.2n ± 0% -6.78% (p=0.000 n=10)
Erfc 126.0n ± 1% 116.6n ± 0% -7.46% (p=0.000 n=10)
Erfinv 138.8n ± 0% 138.6n ± 0% ~ (p=0.082 n=10)
Erfcinv 140.0n ± 0% 139.7n ± 0% ~ (p=0.359 n=10)
Exp 193.3n ± 0% 184.2n ± 0% -4.68% (p=0.000 n=10)
ExpGo 204.8n ± 0% 194.5n ± 0% -5.03% (p=0.000 n=10)
Expm1 152.5n ± 1% 145.0n ± 0% -4.92% (p=0.000 n=10)
Exp2 174.5n ± 0% 164.2n ± 0% -5.85% (p=0.000 n=10)
Exp2Go 184.4n ± 1% 175.4n ± 0% -4.88% (p=0.000 n=10)
Abs 4.912n ± 0% 4.914n ± 0% ~ (p=0.283 n=10)
Dim 15.50n ± 1% 15.52n ± 1% ~ (p=0.331 n=10)
Floor 36.89n ± 1% 36.76n ± 1% ~ (p=0.325 n=10)
Max 31.05n ± 1% 31.17n ± 1% ~ (p=0.628 n=10)
Min 31.01n ± 0% 31.06n ± 0% ~ (p=0.767 n=10)
Mod 294.1n ± 0% 245.6n ± 0% -16.52% (p=0.000 n=10)
Frexp 44.86n ± 1% 35.20n ± 0% -21.53% (p=0.000 n=10)
Gamma 195.8n ± 0% 185.4n ± 1% -5.29% (p=0.000 n=10)
Hypot 84.91n ± 0% 84.54n ± 1% -0.43% (p=0.006 n=10)
HypotGo 96.70n ± 0% 95.42n ± 1% -1.32% (p=0.000 n=10)
Ilogb 45.03n ± 0% 35.07n ± 1% -22.10% (p=0.000 n=10)
J0 634.5n ± 0% 627.2n ± 0% -1.16% (p=0.000 n=10)
J1 644.5n ± 0% 636.9n ± 0% -1.18% (p=0.000 n=10)
Jn 1.357µ ± 0% 1.344µ ± 0% -0.92% (p=0.000 n=10)
Ldexp 49.89n ± 0% 39.96n ± 0% -19.90% (p=0.000 n=10)
Lgamma 186.6n ± 0% 184.3n ± 0% -1.21% (p=0.000 n=10)
Log 150.4n ± 0% 141.1n ± 0% -6.15% (p=0.000 n=10)
Logb 46.70n ± 0% 35.89n ± 0% -23.15% (p=0.000 n=10)
Log1p 164.1n ± 0% 163.9n ± 0% ~ (p=0.122 n=10)
Log10 153.1n ± 0% 143.5n ± 0% -6.24% (p=0.000 n=10)
Log2 58.83n ± 0% 49.75n ± 0% -15.43% (p=0.000 n=10)
Modf 40.82n ± 1% 40.78n ± 0% ~ (p=0.239 n=10)
Nextafter32 49.15n ± 0% 48.93n ± 0% -0.44% (p=0.011 n=10)
Nextafter64 43.33n ± 0% 43.23n ± 0% ~ (p=0.228 n=10)
PowInt 269.4n ± 0% 243.8n ± 0% -9.49% (p=0.000 n=10)
PowFrac 618.0n ± 0% 571.7n ± 0% -7.48% (p=0.000 n=10)
Pow10Pos 13.09n ± 0% 13.05n ± 0% -0.31% (p=0.003 n=10)
Pow10Neg 30.99n ± 1% 30.99n ± 0% ~ (p=0.173 n=10)
Round 23.73n ± 0% 23.65n ± 0% -0.36% (p=0.011 n=10)
RoundToEven 27.87n ± 0% 27.73n ± 0% -0.48% (p=0.003 n=10)
Remainder 282.1n ± 0% 249.6n ± 0% -11.52% (p=0.000 n=10)
Signbit 11.46n ± 0% 11.42n ± 0% -0.39% (p=0.003 n=10)
Sin 115.2n ± 0% 113.2n ± 0% -1.74% (p=0.000 n=10)
Sincos 140.6n ± 0% 138.6n ± 0% -1.39% (p=0.000 n=10)
Sinh 252.0n ± 0% 241.4n ± 0% -4.21% (p=0.000 n=10)
SqrtIndirect 4.909n ± 0% 4.893n ± 0% -0.34% (p=0.021 n=10)
SqrtLatency 19.57n ± 1% 19.57n ± 0% ~ (p=0.087 n=10)
SqrtIndirectLatency 19.64n ± 0% 19.57n ± 0% -0.36% (p=0.025 n=10)
SqrtGoLatency 198.1n ± 0% 197.4n ± 0% -0.35% (p=0.014 n=10)
SqrtPrime 5.733µ ± 0% 5.725µ ± 0% ~ (p=0.116 n=10)
Tan 149.1n ± 0% 146.8n ± 0% -1.54% (p=0.000 n=10)
Tanh 248.2n ± 1% 238.1n ± 0% -4.05% (p=0.000 n=10)
Trunc 36.86n ± 0% 36.70n ± 0% -0.43% (p=0.029 n=10)
Y0 638.2n ± 0% 633.6n ± 0% -0.71% (p=0.000 n=10)
Y1 641.8n ± 0% 636.1n ± 0% -0.87% (p=0.000 n=10)
Yn 1.358µ ± 0% 1.345µ ± 0% -0.92% (p=0.000 n=10)
Float64bits 5.721n ± 0% 5.709n ± 0% -0.22% (p=0.044 n=10)
Float64frombits 4.905n ± 0% 4.893n ± 0% ~ (p=0.266 n=10)
Float32bits 12.27n ± 0% 12.23n ± 0% ~ (p=0.122 n=10)
Float32frombits 4.909n ± 0% 4.893n ± 0% -0.32% (p=0.024 n=10)
FMA 6.556n ± 0% 6.526n ± 0% ~ (p=0.283 n=10)
geomean 86.82n 83.75n -3.54%
Change-Id: I522297a79646d76543d516accce291f5a3cea337
Reviewed-on: https://go-review.googlesource.com/c/go/+/717560
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
315 lines
6.6 KiB
Go
315 lines
6.6 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "math"
|
|
|
|
// This file contains codegen tests related to arithmetic
|
|
// simplifications and optimizations on float types.
|
|
// For codegen tests on integer types, see arithmetic.go.
|
|
|
|
// --------------------- //
|
|
// Strength-reduce //
|
|
// --------------------- //
|
|
|
|
func Mul2(f float64) float64 {
|
|
// 386/sse2:"ADDSD" -"MULSD"
|
|
// amd64:"ADDSD" -"MULSD"
|
|
// arm/7:"ADDD" -"MULD"
|
|
// arm64:"FADDD" -"FMULD"
|
|
// loong64:"ADDD" -"MULD"
|
|
// ppc64x:"FADD" -"FMUL"
|
|
// riscv64:"FADDD" -"FMULD"
|
|
return f * 2.0
|
|
}
|
|
|
|
func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
|
|
// 386/sse2:"MULSD" -"DIVSD"
|
|
// amd64:"MULSD" -"DIVSD"
|
|
// arm/7:"MULD" -"DIVD"
|
|
// arm64:"FMULD" -"FDIVD"
|
|
// loong64:"MULD" -"DIVD"
|
|
// ppc64x:"FMUL" -"FDIV"
|
|
// riscv64:"FMULD" -"FDIVD"
|
|
x := f1 / 16.0
|
|
|
|
// 386/sse2:"MULSD" -"DIVSD"
|
|
// amd64:"MULSD" -"DIVSD"
|
|
// arm/7:"MULD" -"DIVD"
|
|
// arm64:"FMULD" -"FDIVD"
|
|
// loong64:"MULD" -"DIVD"
|
|
// ppc64x:"FMUL" -"FDIVD"
|
|
// riscv64:"FMULD" -"FDIVD"
|
|
y := f2 / 0.125
|
|
|
|
// 386/sse2:"ADDSD" -"DIVSD" -"MULSD"
|
|
// amd64:"ADDSD" -"DIVSD" -"MULSD"
|
|
// arm/7:"ADDD" -"MULD" -"DIVD"
|
|
// arm64:"FADDD" -"FMULD" -"FDIVD"
|
|
// loong64:"ADDD" -"MULD" -"DIVD"
|
|
// ppc64x:"FADD" -"FMUL" -"FDIV"
|
|
// riscv64:"FADDD" -"FMULD" -"FDIVD"
|
|
z := f3 / 0.5
|
|
|
|
return x, y, z
|
|
}
|
|
|
|
func indexLoad(b0 []float32, b1 float32, idx int) float32 {
|
|
// arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
|
|
// loong64:`MOVF\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+`
|
|
return b0[idx] * b1
|
|
}
|
|
|
|
func indexStore(b0 []float64, b1 float64, idx int) {
|
|
// arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
|
|
// loong64:`MOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`
|
|
b0[idx] = b1
|
|
}
|
|
|
|
// ----------- //
|
|
// Fused //
|
|
// ----------- //
|
|
|
|
func FusedAdd32(x, y, z float32) float32 {
|
|
// s390x:"FMADDS "
|
|
// ppc64x:"FMADDS "
|
|
// arm64:"FMADDS"
|
|
// loong64:"FMADDF "
|
|
// riscv64:"FMADDS "
|
|
// amd64/v3:"VFMADD231SS "
|
|
return x*y + z
|
|
}
|
|
|
|
func FusedSub32_a(x, y, z float32) float32 {
|
|
// s390x:"FMSUBS "
|
|
// ppc64x:"FMSUBS "
|
|
// riscv64:"FMSUBS "
|
|
// loong64:"FMSUBF "
|
|
return x*y - z
|
|
}
|
|
|
|
func FusedSub32_b(x, y, z float32) float32 {
|
|
// arm64:"FMSUBS"
|
|
// loong64:"FNMSUBF "
|
|
// riscv64:"FNMSUBS "
|
|
return z - x*y
|
|
}
|
|
|
|
func FusedAdd64(x, y, z float64) float64 {
|
|
// s390x:"FMADD "
|
|
// ppc64x:"FMADD "
|
|
// arm64:"FMADDD"
|
|
// loong64:"FMADDD "
|
|
// riscv64:"FMADDD "
|
|
// amd64/v3:"VFMADD231SD "
|
|
return x*y + z
|
|
}
|
|
|
|
func FusedSub64_a(x, y, z float64) float64 {
|
|
// s390x:"FMSUB "
|
|
// ppc64x:"FMSUB "
|
|
// riscv64:"FMSUBD "
|
|
// loong64:"FMSUBD "
|
|
return x*y - z
|
|
}
|
|
|
|
func FusedSub64_b(x, y, z float64) float64 {
|
|
// arm64:"FMSUBD"
|
|
// loong64:"FNMSUBD "
|
|
// riscv64:"FNMSUBD "
|
|
return z - x*y
|
|
}
|
|
|
|
func Cmp(f float64) bool {
|
|
// arm64:"FCMPD" "(BGT|BLE|BMI|BPL)" -"CSET GT" -"CBZ"
|
|
return f > 4 || f < -4
|
|
}
|
|
|
|
func CmpZero64(f float64) bool {
|
|
// s390x:"LTDBR" -"FCMPU"
|
|
return f <= 0
|
|
}
|
|
|
|
func CmpZero32(f float32) bool {
|
|
// s390x:"LTEBR" -"CEBR"
|
|
return f <= 0
|
|
}
|
|
|
|
func CmpWithSub(a float64, b float64) bool {
|
|
f := a - b
|
|
// s390x:-"LTDBR"
|
|
return f <= 0
|
|
}
|
|
|
|
func CmpWithAdd(a float64, b float64) bool {
|
|
f := a + b
|
|
// s390x:-"LTDBR"
|
|
return f <= 0
|
|
}
|
|
|
|
// ---------------- //
|
|
// Non-floats //
|
|
// ---------------- //
|
|
|
|
func ArrayZero() [16]byte {
|
|
// amd64:"MOVUPS"
|
|
var a [16]byte
|
|
return a
|
|
}
|
|
|
|
func ArrayCopy(a [16]byte) (b [16]byte) {
|
|
// amd64:"MOVUPS"
|
|
b = a
|
|
return
|
|
}
|
|
|
|
// ---------------- //
|
|
// Float Min/Max //
|
|
// ---------------- //
|
|
|
|
func Float64Min(a, b float64) float64 {
|
|
// amd64:"MINSD"
|
|
// arm64:"FMIND"
|
|
// loong64:"FMIND"
|
|
// riscv64:"FMIN"
|
|
// ppc64/power9:"XSMINJDP"
|
|
// ppc64/power10:"XSMINJDP"
|
|
// s390x: "WFMINDB"
|
|
return min(a, b)
|
|
}
|
|
|
|
func Float64Max(a, b float64) float64 {
|
|
// amd64:"MINSD"
|
|
// arm64:"FMAXD"
|
|
// loong64:"FMAXD"
|
|
// riscv64:"FMAX"
|
|
// ppc64/power9:"XSMAXJDP"
|
|
// ppc64/power10:"XSMAXJDP"
|
|
// s390x: "WFMAXDB"
|
|
return max(a, b)
|
|
}
|
|
|
|
func Float32Min(a, b float32) float32 {
|
|
// amd64:"MINSS"
|
|
// arm64:"FMINS"
|
|
// loong64:"FMINF"
|
|
// riscv64:"FMINS"
|
|
// ppc64/power9:"XSMINJDP"
|
|
// ppc64/power10:"XSMINJDP"
|
|
// s390x: "WFMINSB"
|
|
return min(a, b)
|
|
}
|
|
|
|
func Float32Max(a, b float32) float32 {
|
|
// amd64:"MINSS"
|
|
// arm64:"FMAXS"
|
|
// loong64:"FMAXF"
|
|
// riscv64:"FMAXS"
|
|
// ppc64/power9:"XSMAXJDP"
|
|
// ppc64/power10:"XSMAXJDP"
|
|
// s390x: "WFMAXSB"
|
|
return max(a, b)
|
|
}
|
|
|
|
// ------------------------ //
|
|
// Constant Optimizations //
|
|
// ------------------------ //
|
|
|
|
func Float32ConstantZero() float32 {
|
|
// arm64:"FMOVS ZR,"
|
|
return 0.0
|
|
}
|
|
|
|
func Float32ConstantChipFloat() float32 {
|
|
// arm64:"FMOVS [$]\\(2\\.25\\),"
|
|
return 2.25
|
|
}
|
|
|
|
func Float32Constant() float32 {
|
|
// arm64:"FMOVS [$]f32\\.42440000\\(SB\\)"
|
|
// ppc64x/power8:"FMOVS [$]f32\\.42440000\\(SB\\)"
|
|
// ppc64x/power9:"FMOVS [$]f32\\.42440000\\(SB\\)"
|
|
// ppc64x/power10:"XXSPLTIDP [$]1111752704,"
|
|
return 49.0
|
|
}
|
|
|
|
func Float64ConstantZero() float64 {
|
|
// arm64:"FMOVD ZR,"
|
|
return 0.0
|
|
}
|
|
|
|
func Float64ConstantChipFloat() float64 {
|
|
// arm64:"FMOVD [$]\\(2\\.25\\),"
|
|
return 2.25
|
|
}
|
|
|
|
func Float64Constant() float64 {
|
|
// arm64:"FMOVD [$]f64\\.4048800000000000\\(SB\\)"
|
|
// ppc64x/power8:"FMOVD [$]f64\\.4048800000000000\\(SB\\)"
|
|
// ppc64x/power9:"FMOVD [$]f64\\.4048800000000000\\(SB\\)"
|
|
// ppc64x/power10:"XXSPLTIDP [$]1111752704,"
|
|
return 49.0
|
|
}
|
|
|
|
func Float32DenormalConstant() float32 {
|
|
// ppc64x:"FMOVS [$]f32\\.00400000\\(SB\\)"
|
|
return 0x1p-127
|
|
}
|
|
|
|
// A float64 constant which can be exactly represented as a
|
|
// denormal float32 value. On ppc64x, denormal values cannot
|
|
// be used with XXSPLTIDP.
|
|
func Float64DenormalFloat32Constant() float64 {
|
|
// ppc64x:"FMOVD [$]f64\\.3800000000000000\\(SB\\)"
|
|
return 0x1p-127
|
|
}
|
|
|
|
func Float32ConstantStore(p *float32) {
|
|
// amd64:"MOVL [$]1085133554"
|
|
// riscv64: "MOVF [$]f32.40add2f2"
|
|
*p = 5.432
|
|
}
|
|
|
|
func Float64ConstantStore(p *float64) {
|
|
// amd64: "MOVQ [$]4617801906721357038"
|
|
// riscv64: "MOVD [$]f64.4015ba5e353f7cee"
|
|
*p = 5.432
|
|
}
|
|
|
|
// ------------------------ //
|
|
// Subnormal tests //
|
|
// ------------------------ //
|
|
|
|
func isSubnormal(x float64) bool {
|
|
// riscv64:"FCLASSD" -"FABSD"
|
|
return math.Abs(x) < 2.2250738585072014e-308
|
|
}
|
|
|
|
func isNormal(x float64) bool {
|
|
// riscv64:"FCLASSD" -"FABSD"
|
|
return math.Abs(x) >= 0x1p-1022
|
|
}
|
|
|
|
func isPosSubnormal(x float64) bool {
|
|
// riscv64:"FCLASSD"
|
|
return x > 0 && x < 2.2250738585072014e-308
|
|
}
|
|
|
|
func isNegSubnormal(x float64) bool {
|
|
// riscv64:"FCLASSD"
|
|
return x < 0 && x > -0x1p-1022
|
|
}
|
|
|
|
func isPosNormal(x float64) bool {
|
|
// riscv64:"FCLASSD"
|
|
return x >= 2.2250738585072014e-308
|
|
}
|
|
|
|
func isNegNormal(x float64) bool {
|
|
// riscv64:"FCLASSD"
|
|
return x <= -2.2250738585072014e-308
|
|
}
|