mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: adjust softfloat corner cases to match amd64/arm64
This chooses saturating behavior for over/underflow. Change-Id: I96a33ef73feacdafe8310f893de445060bc1a536 Reviewed-on: https://go-review.googlesource.com/c/go/+/709595 Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
78d75b3799
commit
b9f3accdcf
4 changed files with 107 additions and 25 deletions
|
|
@ -25,6 +25,7 @@ var F32to64 = f32to64
|
||||||
var Fcmp64 = fcmp64
|
var Fcmp64 = fcmp64
|
||||||
var Fintto64 = fintto64
|
var Fintto64 = fintto64
|
||||||
var F64toint = f64toint
|
var F64toint = f64toint
|
||||||
|
var F64touint = f64touint64
|
||||||
|
|
||||||
var Entersyscall = entersyscall
|
var Entersyscall = entersyscall
|
||||||
var Exitsyscall = exitsyscall
|
var Exitsyscall = exitsyscall
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,11 @@ const (
|
||||||
neg32 uint32 = 1 << (expbits32 + mantbits32)
|
neg32 uint32 = 1 << (expbits32 + mantbits32)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// If F is not NaN and not Inf, then f == (-1)**sign * mantissa * 2**(exp-52)
|
||||||
|
// The mantissa and exp are adjusted from their stored representation so
|
||||||
|
// that the mantissa includes the formerly implicit 1, the exponent bias
|
||||||
|
// is removed, and denormalized floats to put a 1 in the expected
|
||||||
|
// (1<<mantbits64) position.
|
||||||
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
|
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
|
||||||
sign = f & (1 << (mantbits64 + expbits64))
|
sign = f & (1 << (mantbits64 + expbits64))
|
||||||
mant = f & (1<<mantbits64 - 1)
|
mant = f & (1<<mantbits64 - 1)
|
||||||
|
|
@ -371,24 +376,25 @@ func fcmp64(f, g uint64) (cmp int32, isnan bool) {
|
||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func f64toint(f uint64) (val int64, ok bool) {
|
// returns saturated-conversion int64 value of f
|
||||||
|
// and whether the input was NaN (in which case it
|
||||||
|
// may not match the "hardware" conversion).
|
||||||
|
func f64toint(f uint64) (val int64, isNan bool) {
|
||||||
fs, fm, fe, fi, fn := funpack64(f)
|
fs, fm, fe, fi, fn := funpack64(f)
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case fi, fn: // NaN
|
|
||||||
return 0, false
|
case fn: // NaN
|
||||||
|
return -0x8000_0000_0000_0000, false
|
||||||
|
|
||||||
case fe < -1: // f < 0.5
|
case fe < -1: // f < 0.5
|
||||||
return 0, false
|
return 0, false
|
||||||
|
|
||||||
case fe > 63: // f >= 2^63
|
case fi || fe >= 63: // |f| >= 2^63, including infinity
|
||||||
if fs != 0 && fm == 0 { // f == -2^63
|
|
||||||
return -1 << 63, true
|
|
||||||
}
|
|
||||||
if fs != 0 {
|
if fs != 0 {
|
||||||
return 0, false
|
return -0x8000_0000_0000_0000, true
|
||||||
}
|
}
|
||||||
return 0, false
|
return 0x7fff_ffff_ffff_ffff, true
|
||||||
}
|
}
|
||||||
|
|
||||||
for fe > int(mantbits64) {
|
for fe > int(mantbits64) {
|
||||||
|
|
@ -400,12 +406,51 @@ func f64toint(f uint64) (val int64, ok bool) {
|
||||||
fm >>= 1
|
fm >>= 1
|
||||||
}
|
}
|
||||||
val = int64(fm)
|
val = int64(fm)
|
||||||
|
if val < 0 {
|
||||||
|
if fs != 0 {
|
||||||
|
return -0x8000_0000_0000_0000, true
|
||||||
|
}
|
||||||
|
return 0x7fff_ffff_ffff_ffff, true
|
||||||
|
}
|
||||||
if fs != 0 {
|
if fs != 0 {
|
||||||
val = -val
|
val = -val
|
||||||
}
|
}
|
||||||
return val, true
|
return val, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// returns saturated-conversion uint64 value of f
|
||||||
|
// and whether the input was NaN (in which case it
|
||||||
|
// may not match the "hardware" conversion).
|
||||||
|
func f64touint(f uint64) (val uint64, isNan bool) {
|
||||||
|
fs, fm, fe, fi, fn := funpack64(f)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
|
||||||
|
case fn: // NaN
|
||||||
|
return 0xffff_ffff_ffff_ffff, false
|
||||||
|
|
||||||
|
case fs != 0: // all negative, including -Inf, are zero
|
||||||
|
return 0, true
|
||||||
|
|
||||||
|
case fi || fe >= 64: // positive infinity or f >= 2^64
|
||||||
|
return 0xffff_ffff_ffff_ffff, true
|
||||||
|
|
||||||
|
case fe < -1: // f < 0.5
|
||||||
|
return 0, true
|
||||||
|
}
|
||||||
|
|
||||||
|
for fe > int(mantbits64) {
|
||||||
|
fe--
|
||||||
|
fm <<= 1
|
||||||
|
}
|
||||||
|
for fe < int(mantbits64) {
|
||||||
|
fe++
|
||||||
|
fm >>= 1
|
||||||
|
}
|
||||||
|
val = fm
|
||||||
|
return val, true
|
||||||
|
}
|
||||||
|
|
||||||
func fintto64(val int64) (f uint64) {
|
func fintto64(val int64) (f uint64) {
|
||||||
fs := uint64(val) & (1 << 63)
|
fs := uint64(val) & (1 << 63)
|
||||||
mant := uint64(val)
|
mant := uint64(val)
|
||||||
|
|
@ -564,6 +609,12 @@ func fint64to64(x int64) uint64 {
|
||||||
|
|
||||||
func f32toint32(x uint32) int32 {
|
func f32toint32(x uint32) int32 {
|
||||||
val, _ := f64toint(f32to64(x))
|
val, _ := f64toint(f32to64(x))
|
||||||
|
if val >= 0x7fffffff {
|
||||||
|
return 0x7fffffff
|
||||||
|
}
|
||||||
|
if val < -0x80000000 {
|
||||||
|
return -0x80000000
|
||||||
|
}
|
||||||
return int32(val)
|
return int32(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -574,6 +625,12 @@ func f32toint64(x uint32) int64 {
|
||||||
|
|
||||||
func f64toint32(x uint64) int32 {
|
func f64toint32(x uint64) int32 {
|
||||||
val, _ := f64toint(x)
|
val, _ := f64toint(x)
|
||||||
|
if val >= 0x7fffffff {
|
||||||
|
return 0x7fffffff
|
||||||
|
}
|
||||||
|
if val < -0x80000000 {
|
||||||
|
return -0x80000000
|
||||||
|
}
|
||||||
return int32(val)
|
return int32(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -583,23 +640,13 @@ func f64toint64(x uint64) int64 {
|
||||||
}
|
}
|
||||||
|
|
||||||
func f64touint64(x uint64) uint64 {
|
func f64touint64(x uint64) uint64 {
|
||||||
var m uint64 = 0x43e0000000000000 // float64 1<<63
|
val, _ := f64touint(x)
|
||||||
if fgt64(m, x) {
|
return val
|
||||||
return uint64(f64toint64(x))
|
|
||||||
}
|
|
||||||
y := fadd64(x, -m)
|
|
||||||
z := uint64(f64toint64(y))
|
|
||||||
return z | (1 << 63)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func f32touint64(x uint32) uint64 {
|
func f32touint64(x uint32) uint64 {
|
||||||
var m uint32 = 0x5f000000 // float32 1<<63
|
val, _ := f64touint(f32to64(x))
|
||||||
if fgt32(m, x) {
|
return val
|
||||||
return uint64(f32toint64(x))
|
|
||||||
}
|
|
||||||
y := fadd32(x, -m)
|
|
||||||
z := uint64(f32toint64(y))
|
|
||||||
return z | (1 << 63)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func fuint64to64(x uint64) uint64 {
|
func fuint64to64(x uint64) uint64 {
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,15 @@ func div(x, y float64) float64 { return x / y }
|
||||||
func TestFloat64(t *testing.T) {
|
func TestFloat64(t *testing.T) {
|
||||||
base := []float64{
|
base := []float64{
|
||||||
0,
|
0,
|
||||||
|
1,
|
||||||
|
-9223372036854775808,
|
||||||
|
-9223372036854775808 + 4096,
|
||||||
|
18446744073709551615,
|
||||||
|
18446744073709551615 + 1,
|
||||||
|
18446744073709551615 - 1,
|
||||||
|
9223372036854775808 + 4096,
|
||||||
|
0.5,
|
||||||
|
0.75,
|
||||||
math.Copysign(0, -1),
|
math.Copysign(0, -1),
|
||||||
-1,
|
-1,
|
||||||
1,
|
1,
|
||||||
|
|
@ -35,6 +44,8 @@ func TestFloat64(t *testing.T) {
|
||||||
math.Inf(+1),
|
math.Inf(+1),
|
||||||
math.Inf(-1),
|
math.Inf(-1),
|
||||||
0.1,
|
0.1,
|
||||||
|
0.5,
|
||||||
|
0.75,
|
||||||
1.5,
|
1.5,
|
||||||
1.9999999999999998, // all 1s mantissa
|
1.9999999999999998, // all 1s mantissa
|
||||||
1.3333333333333333, // 1.010101010101...
|
1.3333333333333333, // 1.010101010101...
|
||||||
|
|
@ -70,7 +81,7 @@ func TestFloat64(t *testing.T) {
|
||||||
1e+307,
|
1e+307,
|
||||||
1e+308,
|
1e+308,
|
||||||
}
|
}
|
||||||
all := make([]float64, 200)
|
all := make([]float64, 250)
|
||||||
copy(all, base)
|
copy(all, base)
|
||||||
for i := len(base); i < len(all); i++ {
|
for i := len(base); i < len(all); i++ {
|
||||||
all[i] = rand.NormFloat64()
|
all[i] = rand.NormFloat64()
|
||||||
|
|
@ -82,6 +93,7 @@ func TestFloat64(t *testing.T) {
|
||||||
test(t, "*", mul, fop(Fmul64), all)
|
test(t, "*", mul, fop(Fmul64), all)
|
||||||
test(t, "/", div, fop(Fdiv64), all)
|
test(t, "/", div, fop(Fdiv64), all)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 64 -hw-> 32 -hw-> 64
|
// 64 -hw-> 32 -hw-> 64
|
||||||
|
|
@ -104,6 +116,11 @@ func hwint64(f float64) float64 {
|
||||||
return float64(int64(f))
|
return float64(int64(f))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// float64 -hw-> uint64 -hw-> float64
|
||||||
|
func hwuint64(f float64) float64 {
|
||||||
|
return float64(uint64(f))
|
||||||
|
}
|
||||||
|
|
||||||
// float64 -hw-> int32 -hw-> float64
|
// float64 -hw-> int32 -hw-> float64
|
||||||
func hwint32(f float64) float64 {
|
func hwint32(f float64) float64 {
|
||||||
return float64(int32(f))
|
return float64(int32(f))
|
||||||
|
|
@ -113,13 +130,23 @@ func hwint32(f float64) float64 {
|
||||||
func toint64sw(f float64) float64 {
|
func toint64sw(f float64) float64 {
|
||||||
i, ok := F64toint(math.Float64bits(f))
|
i, ok := F64toint(math.Float64bits(f))
|
||||||
if !ok {
|
if !ok {
|
||||||
// There's no right answer for out of range.
|
// There's no right answer for NaN.
|
||||||
// Match the hardware to pass the test.
|
// Match the hardware to pass the test.
|
||||||
i = int64(f)
|
i = int64(f)
|
||||||
}
|
}
|
||||||
return float64(i)
|
return float64(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func touint64sw(f float64) float64 {
|
||||||
|
i := F64touint(math.Float64bits(f))
|
||||||
|
if f != f {
|
||||||
|
// There's no right answer for NaN.
|
||||||
|
// Match the hardware to pass the test.
|
||||||
|
i = uint64(f)
|
||||||
|
}
|
||||||
|
return float64(i)
|
||||||
|
}
|
||||||
|
|
||||||
// float64 -hw-> int64 -sw-> float64
|
// float64 -hw-> int64 -sw-> float64
|
||||||
func fromint64sw(f float64) float64 {
|
func fromint64sw(f float64) float64 {
|
||||||
return math.Float64frombits(Fintto64(int64(f)))
|
return math.Float64frombits(Fintto64(int64(f)))
|
||||||
|
|
@ -150,6 +177,7 @@ func test(t *testing.T, op string, hw, sw func(float64, float64) float64, all []
|
||||||
testu(t, "to32", trunc32, to32sw, h)
|
testu(t, "to32", trunc32, to32sw, h)
|
||||||
testu(t, "to64", trunc32, to64sw, h)
|
testu(t, "to64", trunc32, to64sw, h)
|
||||||
testu(t, "toint64", hwint64, toint64sw, h)
|
testu(t, "toint64", hwint64, toint64sw, h)
|
||||||
|
testu(t, "touint64", hwuint64, touint64sw, h)
|
||||||
testu(t, "fromint64", hwint64, fromint64sw, h)
|
testu(t, "fromint64", hwint64, fromint64sw, h)
|
||||||
testcmp(t, f, h)
|
testcmp(t, f, h)
|
||||||
testcmp(t, h, f)
|
testcmp(t, h, f)
|
||||||
|
|
@ -163,6 +191,7 @@ func testu(t *testing.T, op string, hw, sw func(float64) float64, v float64) {
|
||||||
h := hw(v)
|
h := hw(v)
|
||||||
s := sw(v)
|
s := sw(v)
|
||||||
if !same(h, s) {
|
if !same(h, s) {
|
||||||
|
s = sw(v) // debug me
|
||||||
err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
|
err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,8 @@ func main() {
|
||||||
p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa
|
p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa
|
||||||
n32_minus4k := id(float32(n32 - 4096))
|
n32_minus4k := id(float32(n32 - 4096))
|
||||||
n64_minus4k := id(float64(n64 - 4096))
|
n64_minus4k := id(float64(n64 - 4096))
|
||||||
|
n32_plus4k := id(float32(n32 + 4096))
|
||||||
|
n64_plus4k := id(float64(n64 + 4096))
|
||||||
inf_32 := id(float32(one / 0))
|
inf_32 := id(float32(one / 0))
|
||||||
inf_64 := id(float64(one / 0))
|
inf_64 := id(float64(one / 0))
|
||||||
ninf_32 := id(float32(-one / 0))
|
ninf_32 := id(float32(-one / 0))
|
||||||
|
|
@ -79,6 +81,7 @@ func main() {
|
||||||
{"p64_plus4k_plus1", p64_plus4k_plus1, p32},
|
{"p64_plus4k_plus1", p64_plus4k_plus1, p32},
|
||||||
{"n32_minus4k", n32_minus4k, n32},
|
{"n32_minus4k", n32_minus4k, n32},
|
||||||
{"n64_minus4k", n64_minus4k, n32},
|
{"n64_minus4k", n64_minus4k, n32},
|
||||||
|
{"n32_plus4k", n32_plus4k, n32 + 4096},
|
||||||
{"inf_32", inf_32, p32},
|
{"inf_32", inf_32, p32},
|
||||||
{"inf_64", inf_64, p32},
|
{"inf_64", inf_64, p32},
|
||||||
{"ninf_32", ninf_32, n32},
|
{"ninf_32", ninf_32, n32},
|
||||||
|
|
@ -108,6 +111,8 @@ func main() {
|
||||||
{"p64_plus4k_plus1", p64_plus4k_plus1, p64},
|
{"p64_plus4k_plus1", p64_plus4k_plus1, p64},
|
||||||
{"n32_minus4k", n32_minus4k, n32 - 4096},
|
{"n32_minus4k", n32_minus4k, n32 - 4096},
|
||||||
{"n64_minus4k", n64_minus4k, n64},
|
{"n64_minus4k", n64_minus4k, n64},
|
||||||
|
{"n32_plus4k", n32_plus4k, n32 + 4096},
|
||||||
|
{"n64_plus4k", n64_plus4k, n64 + 4096},
|
||||||
{"inf_32", inf_32, p64},
|
{"inf_32", inf_32, p64},
|
||||||
{"inf_64", inf_64, p64},
|
{"inf_64", inf_64, p64},
|
||||||
{"ninf_32", ninf_32, n64},
|
{"ninf_32", ninf_32, n64},
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue