runtime: adjust softfloat corner cases to match amd64/arm64

This chooses saturating behavior for over/underflow.

Change-Id: I96a33ef73feacdafe8310f893de445060bc1a536
Reviewed-on: https://go-review.googlesource.com/c/go/+/709595
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
David Chase 2025-10-06 15:01:03 -04:00
parent 78d75b3799
commit b9f3accdcf
4 changed files with 107 additions and 25 deletions

View file

@ -25,6 +25,7 @@ var F32to64 = f32to64
var Fcmp64 = fcmp64 var Fcmp64 = fcmp64
var Fintto64 = fintto64 var Fintto64 = fintto64
var F64toint = f64toint var F64toint = f64toint
var F64touint = f64touint64
var Entersyscall = entersyscall var Entersyscall = entersyscall
var Exitsyscall = exitsyscall var Exitsyscall = exitsyscall

View file

@ -26,6 +26,11 @@ const (
neg32 uint32 = 1 << (expbits32 + mantbits32) neg32 uint32 = 1 << (expbits32 + mantbits32)
) )
// If F is not NaN and not Inf, then f == (-1)**sign * mantissa * 2**(exp-52)
// The mantissa and exp are adjusted from their stored representation so
// that the mantissa includes the formerly implicit 1, the exponent bias
// is removed, and denormalized floats to put a 1 in the expected
// (1<<mantbits64) position.
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) { func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
sign = f & (1 << (mantbits64 + expbits64)) sign = f & (1 << (mantbits64 + expbits64))
mant = f & (1<<mantbits64 - 1) mant = f & (1<<mantbits64 - 1)
@ -371,24 +376,25 @@ func fcmp64(f, g uint64) (cmp int32, isnan bool) {
return 0, false return 0, false
} }
func f64toint(f uint64) (val int64, ok bool) { // returns saturated-conversion int64 value of f
// and whether the input was NaN (in which case it
// may not match the "hardware" conversion).
func f64toint(f uint64) (val int64, isNan bool) {
fs, fm, fe, fi, fn := funpack64(f) fs, fm, fe, fi, fn := funpack64(f)
switch { switch {
case fi, fn: // NaN
return 0, false case fn: // NaN
return -0x8000_0000_0000_0000, false
case fe < -1: // f < 0.5 case fe < -1: // f < 0.5
return 0, false return 0, false
case fe > 63: // f >= 2^63 case fi || fe >= 63: // |f| >= 2^63, including infinity
if fs != 0 && fm == 0 { // f == -2^63
return -1 << 63, true
}
if fs != 0 { if fs != 0 {
return 0, false return -0x8000_0000_0000_0000, true
} }
return 0, false return 0x7fff_ffff_ffff_ffff, true
} }
for fe > int(mantbits64) { for fe > int(mantbits64) {
@ -400,12 +406,51 @@ func f64toint(f uint64) (val int64, ok bool) {
fm >>= 1 fm >>= 1
} }
val = int64(fm) val = int64(fm)
if val < 0 {
if fs != 0 {
return -0x8000_0000_0000_0000, true
}
return 0x7fff_ffff_ffff_ffff, true
}
if fs != 0 { if fs != 0 {
val = -val val = -val
} }
return val, true return val, true
} }
// returns saturated-conversion uint64 value of f
// and whether the input was NaN (in which case it
// may not match the "hardware" conversion).
func f64touint(f uint64) (val uint64, isNan bool) {
fs, fm, fe, fi, fn := funpack64(f)
switch {
case fn: // NaN
return 0xffff_ffff_ffff_ffff, false
case fs != 0: // all negative, including -Inf, are zero
return 0, true
case fi || fe >= 64: // positive infinity or f >= 2^64
return 0xffff_ffff_ffff_ffff, true
case fe < -1: // f < 0.5
return 0, true
}
for fe > int(mantbits64) {
fe--
fm <<= 1
}
for fe < int(mantbits64) {
fe++
fm >>= 1
}
val = fm
return val, true
}
func fintto64(val int64) (f uint64) { func fintto64(val int64) (f uint64) {
fs := uint64(val) & (1 << 63) fs := uint64(val) & (1 << 63)
mant := uint64(val) mant := uint64(val)
@ -564,6 +609,12 @@ func fint64to64(x int64) uint64 {
func f32toint32(x uint32) int32 { func f32toint32(x uint32) int32 {
val, _ := f64toint(f32to64(x)) val, _ := f64toint(f32to64(x))
if val >= 0x7fffffff {
return 0x7fffffff
}
if val < -0x80000000 {
return -0x80000000
}
return int32(val) return int32(val)
} }
@ -574,6 +625,12 @@ func f32toint64(x uint32) int64 {
func f64toint32(x uint64) int32 { func f64toint32(x uint64) int32 {
val, _ := f64toint(x) val, _ := f64toint(x)
if val >= 0x7fffffff {
return 0x7fffffff
}
if val < -0x80000000 {
return -0x80000000
}
return int32(val) return int32(val)
} }
@ -583,23 +640,13 @@ func f64toint64(x uint64) int64 {
} }
func f64touint64(x uint64) uint64 { func f64touint64(x uint64) uint64 {
var m uint64 = 0x43e0000000000000 // float64 1<<63 val, _ := f64touint(x)
if fgt64(m, x) { return val
return uint64(f64toint64(x))
}
y := fadd64(x, -m)
z := uint64(f64toint64(y))
return z | (1 << 63)
} }
func f32touint64(x uint32) uint64 { func f32touint64(x uint32) uint64 {
var m uint32 = 0x5f000000 // float32 1<<63 val, _ := f64touint(f32to64(x))
if fgt32(m, x) { return val
return uint64(f32toint64(x))
}
y := fadd32(x, -m)
z := uint64(f32toint64(y))
return z | (1 << 63)
} }
func fuint64to64(x uint64) uint64 { func fuint64to64(x uint64) uint64 {

View file

@ -28,6 +28,15 @@ func div(x, y float64) float64 { return x / y }
func TestFloat64(t *testing.T) { func TestFloat64(t *testing.T) {
base := []float64{ base := []float64{
0, 0,
1,
-9223372036854775808,
-9223372036854775808 + 4096,
18446744073709551615,
18446744073709551615 + 1,
18446744073709551615 - 1,
9223372036854775808 + 4096,
0.5,
0.75,
math.Copysign(0, -1), math.Copysign(0, -1),
-1, -1,
1, 1,
@ -35,6 +44,8 @@ func TestFloat64(t *testing.T) {
math.Inf(+1), math.Inf(+1),
math.Inf(-1), math.Inf(-1),
0.1, 0.1,
0.5,
0.75,
1.5, 1.5,
1.9999999999999998, // all 1s mantissa 1.9999999999999998, // all 1s mantissa
1.3333333333333333, // 1.010101010101... 1.3333333333333333, // 1.010101010101...
@ -70,7 +81,7 @@ func TestFloat64(t *testing.T) {
1e+307, 1e+307,
1e+308, 1e+308,
} }
all := make([]float64, 200) all := make([]float64, 250)
copy(all, base) copy(all, base)
for i := len(base); i < len(all); i++ { for i := len(base); i < len(all); i++ {
all[i] = rand.NormFloat64() all[i] = rand.NormFloat64()
@ -82,6 +93,7 @@ func TestFloat64(t *testing.T) {
test(t, "*", mul, fop(Fmul64), all) test(t, "*", mul, fop(Fmul64), all)
test(t, "/", div, fop(Fdiv64), all) test(t, "/", div, fop(Fdiv64), all)
} }
} }
// 64 -hw-> 32 -hw-> 64 // 64 -hw-> 32 -hw-> 64
@ -104,6 +116,11 @@ func hwint64(f float64) float64 {
return float64(int64(f)) return float64(int64(f))
} }
// float64 -hw-> uint64 -hw-> float64
func hwuint64(f float64) float64 {
return float64(uint64(f))
}
// float64 -hw-> int32 -hw-> float64 // float64 -hw-> int32 -hw-> float64
func hwint32(f float64) float64 { func hwint32(f float64) float64 {
return float64(int32(f)) return float64(int32(f))
@ -113,13 +130,23 @@ func hwint32(f float64) float64 {
func toint64sw(f float64) float64 { func toint64sw(f float64) float64 {
i, ok := F64toint(math.Float64bits(f)) i, ok := F64toint(math.Float64bits(f))
if !ok { if !ok {
// There's no right answer for out of range. // There's no right answer for NaN.
// Match the hardware to pass the test. // Match the hardware to pass the test.
i = int64(f) i = int64(f)
} }
return float64(i) return float64(i)
} }
func touint64sw(f float64) float64 {
i := F64touint(math.Float64bits(f))
if f != f {
// There's no right answer for NaN.
// Match the hardware to pass the test.
i = uint64(f)
}
return float64(i)
}
// float64 -hw-> int64 -sw-> float64 // float64 -hw-> int64 -sw-> float64
func fromint64sw(f float64) float64 { func fromint64sw(f float64) float64 {
return math.Float64frombits(Fintto64(int64(f))) return math.Float64frombits(Fintto64(int64(f)))
@ -150,6 +177,7 @@ func test(t *testing.T, op string, hw, sw func(float64, float64) float64, all []
testu(t, "to32", trunc32, to32sw, h) testu(t, "to32", trunc32, to32sw, h)
testu(t, "to64", trunc32, to64sw, h) testu(t, "to64", trunc32, to64sw, h)
testu(t, "toint64", hwint64, toint64sw, h) testu(t, "toint64", hwint64, toint64sw, h)
testu(t, "touint64", hwuint64, touint64sw, h)
testu(t, "fromint64", hwint64, fromint64sw, h) testu(t, "fromint64", hwint64, fromint64sw, h)
testcmp(t, f, h) testcmp(t, f, h)
testcmp(t, h, f) testcmp(t, h, f)
@ -163,6 +191,7 @@ func testu(t *testing.T, op string, hw, sw func(float64) float64, v float64) {
h := hw(v) h := hw(v)
s := sw(v) s := sw(v)
if !same(h, s) { if !same(h, s) {
s = sw(v) // debug me
err(t, "%s %g = sw %g, hw %g\n", op, v, s, h) err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
} }
} }

View file

@ -62,6 +62,8 @@ func main() {
p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa
n32_minus4k := id(float32(n32 - 4096)) n32_minus4k := id(float32(n32 - 4096))
n64_minus4k := id(float64(n64 - 4096)) n64_minus4k := id(float64(n64 - 4096))
n32_plus4k := id(float32(n32 + 4096))
n64_plus4k := id(float64(n64 + 4096))
inf_32 := id(float32(one / 0)) inf_32 := id(float32(one / 0))
inf_64 := id(float64(one / 0)) inf_64 := id(float64(one / 0))
ninf_32 := id(float32(-one / 0)) ninf_32 := id(float32(-one / 0))
@ -79,6 +81,7 @@ func main() {
{"p64_plus4k_plus1", p64_plus4k_plus1, p32}, {"p64_plus4k_plus1", p64_plus4k_plus1, p32},
{"n32_minus4k", n32_minus4k, n32}, {"n32_minus4k", n32_minus4k, n32},
{"n64_minus4k", n64_minus4k, n32}, {"n64_minus4k", n64_minus4k, n32},
{"n32_plus4k", n32_plus4k, n32 + 4096},
{"inf_32", inf_32, p32}, {"inf_32", inf_32, p32},
{"inf_64", inf_64, p32}, {"inf_64", inf_64, p32},
{"ninf_32", ninf_32, n32}, {"ninf_32", ninf_32, n32},
@ -108,6 +111,8 @@ func main() {
{"p64_plus4k_plus1", p64_plus4k_plus1, p64}, {"p64_plus4k_plus1", p64_plus4k_plus1, p64},
{"n32_minus4k", n32_minus4k, n32 - 4096}, {"n32_minus4k", n32_minus4k, n32 - 4096},
{"n64_minus4k", n64_minus4k, n64}, {"n64_minus4k", n64_minus4k, n64},
{"n32_plus4k", n32_plus4k, n32 + 4096},
{"n64_plus4k", n64_plus4k, n64 + 4096},
{"inf_32", inf_32, p64}, {"inf_32", inf_32, p64},
{"inf_64", inf_64, p64}, {"inf_64", inf_64, p64},
{"ninf_32", ninf_32, n64}, {"ninf_32", ninf_32, n64},