// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This file implements multi-precision floating-point numbers. // Like in the GNU MPFR library (http://www.mpfr.org/), operands // can be of mixed precision. Unlike MPFR, the rounding mode is // not specified with each operation, but with each operand. The // rounding mode of the result operand determines the rounding // mode of an operation. This is a from-scratch implementation. // CAUTION: WORK IN PROGRESS - USE AT YOUR OWN RISK. package big import ( "fmt" "math" ) const debugFloat = true // enable for debugging // A Float represents a multi-precision floating point number of the form // // sign × mantissa × 2**exponent // // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp (with the // exception of 0 and Inf which have a 0 mantissa and special exponents). // // Each Float value also has a precision, rounding mode, and accuracy. // // The precision is the maximum number of mantissa bits available to // represent the value. The rounding mode specifies how a result should // be rounded to fit into the mantissa bits, and accuracy describes the // rounding error with respect to the exact result. // // All operations, including setters, that specify a *Float for the result, // usually via the receiver, round their result to the result's precision // and according to its rounding mode, unless specified otherwise. If the // result precision is 0 (see below), it is set to the precision of the // argument with the largest precision value before any rounding takes // place, and the rounding mode remains unchanged. Thus, uninitialized Floats // provided as result arguments will have their precision set to a reasonable // value determined by the operands and their mode is the zero value for // RoundingMode (ToNearestEven). // // By setting the desired precision to 24 or 53 and using ToNearestEven // rounding, Float operations produce the same results as the corresponding // float32 or float64 IEEE-754 arithmetic for normalized operands (no NaNs // or denormalized numbers). Additionally, positive and negative zeros and // infinities are fully supported. // // The zero (uninitialized) value for a Float is ready to use and represents // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. // type Float struct { mode RoundingMode acc Accuracy neg bool mant nat exp int32 prec uint // TODO(gri) make this a 32bit field } // TODO(gri) provide a couple of Example tests showing typical Float intialization // and use. // Internal representation: The mantissa bits x.mant of a Float x are stored // in a nat slice long enough to hold up to x.prec bits; the slice may (but // doesn't have to) be shorter if the mantissa contains trailing 0 bits. // Unless x is a zero or an infinity, x.mant is normalized such that the // msb of x.mant == 1 (i.e., the msb is shifted all the way "to the left"). // Thus, if the mantissa has trailing 0 bits or x.prec is not a multiple // of the the Word size _W, x.mant[0] has trailing zero bits. Zero and Inf // values have an empty mantissa and a 0 or infExp exponent, respectively. const ( MaxExp = math.MaxInt32 // largest supported exponent magnitude infExp = -MaxExp - 1 // exponent for Inf values ) // NewInf returns a new infinite Float value with value +Inf (sign >= 0), // or -Inf (sign < 0). func NewInf(sign int) *Float { return &Float{neg: sign < 0, exp: infExp} } // Accuracy describes the rounding error produced by the most recent // operation that generated a Float value, relative to the exact value: // // -1: below exact value // 0: exact value // +1: above exact value // type Accuracy int8 // Constants describing the Accuracy of a Float. const ( Below Accuracy = -1 Exact Accuracy = 0 Above Accuracy = +1 ) func (a Accuracy) String() string { switch { case a < 0: return "below" default: return "exact" case a > 0: return "above" } } // RoundingMode determines how a Float value is rounded to the // desired precision. Rounding may change the Float value; the // rounding error is described by the Float's Accuracy. type RoundingMode uint8 // The following rounding modes are supported. const ( ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven ToNearestAway // == IEEE 754-2008 roundTiesToAway ToZero // == IEEE 754-2008 roundTowardZero AwayFromZero // no IEEE 754-2008 equivalent ToNegativeInf // == IEEE 754-2008 roundTowardNegative ToPositiveInf // == IEEE 754-2008 roundTowardPositive ) func (mode RoundingMode) String() string { switch mode { case ToNearestEven: return "ToNearestEven" case ToNearestAway: return "ToNearestAway" case ToZero: return "ToZero" case AwayFromZero: return "AwayFromZero" case ToNegativeInf: return "ToNegativeInf" case ToPositiveInf: return "ToPositiveInf" } panic("unreachable") } // SetPrec sets z's precision to prec and returns the (possibly) rounded // value of z. Rounding occurs according to z's rounding mode if the mantissa // cannot be represented in prec bits without loss of precision. func (z *Float) SetPrec(prec uint) *Float { old := z.prec z.acc = Exact z.prec = prec if prec < old { z.round(0) } return z } // SetMode sets z's rounding mode to mode and returns an exact z. // z remains unchanged otherwise. func (z *Float) SetMode(mode RoundingMode) *Float { z.acc = Exact // TODO(gri) should we not do this? what's the general rule for setting accuracy? z.mode = mode return z } // Prec returns the mantissa precision of x in bits. // The result may be 0 for |x| == 0 or |x| == Inf. func (x *Float) Prec() uint { return uint(x.prec) } // Acc returns the accuracy of x produced by the most recent operation. func (x *Float) Acc() Accuracy { return x.acc } // Mode returns the rounding mode of x. func (x *Float) Mode() RoundingMode { return x.mode } // Sign returns: // // -1 if x < 0 // 0 if x == 0 or x == -0 // +1 if x > 0 // func (x *Float) Sign() int { s := 0 if len(x.mant) != 0 || x.exp == infExp { s = 1 // non-zero x } if x.neg { s = -s } return s } // MantExp breaks x into its mantissa and exponent components. // It returns mant and exp satisfying x == mant × 2**exp, with // the absolute value of mant satisfying 0.5 <= |mant| < 1.0. // mant has the same precision and rounding mode as x. // If a non-nil *Float argument z is provided it is used to // store the result mant; otherwise a new Float is allocated. // // Special cases are: // // ( ±0).MantExp() = ±0, 0 // (±Inf).MantExp() = ±Inf, 0 // // MantExp does not modify x; the result mant is a new Float. func (x *Float) MantExp(z *Float) (mant *Float, exp int) { if z == nil { z = new(Float) } mant = z.Copy(x) if x.exp != infExp { exp = int(x.exp) mant.exp = 0 // after reading x.exp (x and mant may be aliases) } return } // SetMantExp sets z to mant × 2**exp and and returns z. // The result z has the same precision and rounding mode // as mant. SetMantExp is an inverse of MantExp but does // not require 0.5 <= |mant| < 1.0. Specifically: // // new(Float).SetMantExp(x.MantExp()).Cmp(x) == 0 // // Special cases are: // // z.SetMantExp( ±0, exp) = ±0 // z.SetMantExp(±Inf, exp) = ±Inf // func (z *Float) SetMantExp(mant *Float, exp int) *Float { z.Copy(mant) if len(z.mant) == 0 || z.exp == infExp { return z } z.setExp(int64(z.exp) + int64(exp)) return z } // IsInt reports whether x is an integer. // ±Inf are not considered integers. func (x *Float) IsInt() bool { if debugFloat { validate(x) } // pick off easy cases if x.exp <= 0 { // |x| < 1 || |x| == Inf return len(x.mant) == 0 && x.exp != infExp } // x.exp > 0 return x.prec <= uint(x.exp) || x.minPrec() <= uint(x.exp) // not enough bits for fractional mantissa } // IsInf reports whether x is an infinity, according to sign. // If sign > 0, IsInf reports whether x is positive infinity. // If sign < 0, IsInf reports whether x is negative infinity. // If sign == 0, IsInf reports whether x is either infinity. func (x *Float) IsInf(sign int) bool { return x.exp == infExp && (sign == 0 || x.neg == (sign < 0)) } // setExp sets the exponent for z. // If the exponent's magnitude is too large, z becomes ±Inf. func (z *Float) setExp(e int64) { if -MaxExp <= e && e <= MaxExp { if len(z.mant) == 0 { e = 0 } z.exp = int32(e) return } // Inf z.mant = z.mant[:0] z.exp = infExp } // debugging support func validate(x *Float) { const msb = 1 << (_W - 1) m := len(x.mant) if m == 0 { // 0.0 or Inf if x.exp != 0 && x.exp != infExp { panic(fmt.Sprintf("%empty matissa with invalid exponent %d", x.exp)) } return } if x.mant[m-1]&msb == 0 { panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Format('p', 0))) } if x.prec <= 0 { panic(fmt.Sprintf("invalid precision %d", x.prec)) } } // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. // sbit must be 0 or 1 and summarizes any "sticky bit" information one might // have before calling round. z's mantissa must be normalized (with the msb set) // or empty. // // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the // sign of z. For correct rounding, the sign of z must be set correctly before // calling round. func (z *Float) round(sbit uint) { if debugFloat { validate(z) } z.acc = Exact // handle zero and Inf m := uint(len(z.mant)) // present mantissa length in words if m == 0 { if z.exp != infExp { z.exp = 0 } return } // m > 0 implies z.prec > 0 (checked by validate) bits := m * _W // present mantissa bits if bits <= z.prec { // mantissa fits => nothing to do return } // bits > z.prec n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision // Rounding is based on two bits: the rounding bit (rbit) and the // sticky bit (sbit). The rbit is the bit immediately before the // z.prec leading mantissa bits (the "0.5"). The sbit is set if any // of the bits before the rbit are set (the "0.25", "0.125", etc.): // // rbit sbit => "fractional part" // // 0 0 == 0 // 0 1 > 0 , < 0.5 // 1 0 == 0.5 // 1 1 > 0.5, < 1.0 // bits > z.prec: mantissa too large => round r := bits - z.prec - 1 // rounding bit position; r >= 0 rbit := z.mant.bit(r) // rounding bit if sbit == 0 { sbit = z.mant.sticky(r) } if debugFloat && sbit&^1 != 0 { panic(fmt.Sprintf("invalid sbit %#x", sbit)) } // convert ToXInf rounding modes mode := z.mode switch mode { case ToNegativeInf: mode = ToZero if z.neg { mode = AwayFromZero } case ToPositiveInf: mode = AwayFromZero if z.neg { mode = ToZero } } // cut off extra words if m > n { copy(z.mant, z.mant[m-n:]) // move n last words to front z.mant = z.mant[:n] } // determine number of trailing zero bits t t := n*_W - z.prec // 0 <= t < _W lsb := Word(1) << t // make rounding decision // TODO(gri) This can be simplified (see roundBits in float_test.go). switch mode { case ToZero: // nothing to do case ToNearestEven, ToNearestAway: if rbit == 0 { // rounding bits == 0b0x mode = ToZero } else if sbit == 1 { // rounding bits == 0b11 mode = AwayFromZero } case AwayFromZero: if rbit|sbit == 0 { mode = ToZero } default: // ToXInf modes have been converted to ToZero or AwayFromZero panic("unreachable") } // round and determine accuracy switch mode { case ToZero: if rbit|sbit != 0 { z.acc = Below } case ToNearestEven, ToNearestAway: if debugFloat && rbit != 1 { panic("internal error in rounding") } if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 { z.acc = Below break } // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0 fallthrough case AwayFromZero: // add 1 to mantissa if addVW(z.mant, z.mant, lsb) != 0 { // overflow => shift mantissa right by 1 and add msb shrVU(z.mant, z.mant, 1) z.mant[n-1] |= 1 << (_W - 1) // adjust exponent z.exp++ } z.acc = Above } // zero out trailing bits in least-significant word z.mant[0] &^= lsb - 1 // update accuracy if z.neg { z.acc = -z.acc } if debugFloat { validate(z) } return } // nlz returns the number of leading zero bits in x. func nlz(x Word) uint { return _W - uint(bitLen(x)) } func nlz64(x uint64) uint { // TODO(gri) this can be done more nicely if _W == 32 { if x>>32 == 0 { return 32 + nlz(Word(x)) } return nlz(Word(x >> 32)) } if _W == 64 { return nlz(Word(x)) } panic("unreachable") } func (z *Float) setBits64(neg bool, x uint64) *Float { if z.prec == 0 { z.prec = 64 } z.acc = Exact z.neg = neg if x == 0 { z.mant = z.mant[:0] z.exp = 0 return z } // x != 0 s := nlz64(x) z.mant = z.mant.setUint64(x << s) z.exp = int32(64 - s) // always fits if z.prec < 64 { z.round(0) } return z } // SetUint64 sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to 64 (and rounding will have // no effect). func (z *Float) SetUint64(x uint64) *Float { return z.setBits64(false, x) } // SetInt64 sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to 64 (and rounding will have // no effect). func (z *Float) SetInt64(x int64) *Float { u := x if u < 0 { u = -u } // We cannot simply call z.SetUint64(uint64(u)) and change // the sign afterwards because the sign affects rounding. return z.setBits64(x < 0, uint64(u)) } // SetFloat64 sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to 53 (and rounding will have // no effect). // If x is denormalized or NaN, the result is unspecified. // TODO(gri) should return nil in those cases func (z *Float) SetFloat64(x float64) *Float { if z.prec == 0 { z.prec = 53 } z.acc = Exact z.neg = math.Signbit(x) // handle -0 correctly if math.IsInf(x, 0) { z.mant = z.mant[:0] z.exp = infExp return z } if x == 0 { z.mant = z.mant[:0] z.exp = 0 return z } // x != 0 fmant, exp := math.Frexp(x) // get normalized mantissa z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) z.exp = int32(exp) // always fits if z.prec < 53 { z.round(0) } return z } // fnorm normalizes mantissa m by shifting it to the left // such that the msb of the most-significant word (msw) is 1. // It returns the shift amount. It assumes that len(m) != 0. func fnorm(m nat) int64 { if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { panic("msw of mantissa is 0") } s := nlz(m[len(m)-1]) if s > 0 { c := shlVU(m, m, s) if debugFloat && c != 0 { panic("nlz or shlVU incorrect") } } return int64(s) } // SetInt sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to the larger of x.BitLen() // or 64 (and rounding will have no effect). func (z *Float) SetInt(x *Int) *Float { // TODO(gri) can be more efficient if z.prec > 0 // but small compared to the size of x, or if there // are many trailing 0's. bits := uint(x.BitLen()) if z.prec == 0 { z.prec = umax(bits, 64) } z.acc = Exact z.neg = x.neg if len(x.abs) == 0 { z.mant = z.mant[:0] z.exp = 0 return z } // x != 0 z.mant = z.mant.set(x.abs) fnorm(z.mant) z.setExp(int64(bits)) if z.prec < bits { z.round(0) } return z } // SetRat sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to the largest of a.BitLen(), // b.BitLen(), or 64; with x = a/b. func (z *Float) SetRat(x *Rat) *Float { if x.IsInt() { return z.SetInt(x.Num()) } var a, b Float a.SetInt(x.Num()) b.SetInt(x.Denom()) if z.prec == 0 { z.prec = umax(a.prec, b.prec) } return z.Quo(&a, &b) } // Set sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to the precision of x // before setting z (and rounding will have no effect). // Rounding is performed according to z's precision and rounding // mode; and z's accuracy reports the result error relative to the // exact (not rounded) result. func (z *Float) Set(x *Float) *Float { // TODO(gri) what about z.acc? should it be always Exact? if z != x { if z.prec == 0 { z.prec = x.prec } z.acc = Exact z.neg = x.neg z.exp = x.exp z.mant = z.mant.set(x.mant) if z.prec < x.prec { z.round(0) } } return z } // Copy sets z to x, with the same precision and rounding mode as x, // and returns z. func (z *Float) Copy(x *Float) *Float { // TODO(gri) what about z.acc? should it be always Exact? if z != x { z.acc = Exact z.neg = x.neg z.exp = x.exp z.mant = z.mant.set(x.mant) z.prec = x.prec z.mode = x.mode } return z } func high64(x nat) uint64 { i := len(x) if i == 0 { return 0 } // i > 0 v := uint64(x[i-1]) if _W == 32 { v <<= 32 if i > 1 { v |= uint64(x[i-2]) } } return v } // minPrec returns the minimum precision required to represent // x without loss of accuracy. // TODO(gri) this might be useful to export, perhaps under a better name func (x *Float) minPrec() uint { return uint(len(x.mant))*_W - x.mant.trailingZeroBits() } // Uint64 returns the unsigned integer resulting from truncating x // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact // if x is an integer and Below otherwise. // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) // for x > math.MaxUint64. func (x *Float) Uint64() (uint64, Accuracy) { if debugFloat { validate(x) } switch x.ord() { case -2, -1: // x < 0 return 0, Above case 0: // x == 0 || x == -0 return 0, Exact case 1: // 0 < x < +Inf if x.exp <= 0 { // 0 < x < 1 return 0, Below } // 1 <= x < +Inf if x.exp <= 64 { // u = trunc(x) fits into a uint64 u := high64(x.mant) >> (64 - uint32(x.exp)) if x.minPrec() <= 64 { return u, Exact } return u, Below // x truncated } fallthrough // x too large case 2: // x == +Inf return math.MaxUint64, Below } panic("unreachable") } // Int64 returns the integer resulting from truncating x towards zero. // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is // an integer, and Above (x < 0) or Below (x > 0) otherwise. // The result is (math.MinInt64, Above) for x < math.MinInt64, and // (math.MaxInt64, Below) for x > math.MaxInt64. func (x *Float) Int64() (int64, Accuracy) { if debugFloat { validate(x) } switch x.ord() { case -2: // x == -Inf return math.MinInt64, Above case 0: // x == 0 || x == -0 return 0, Exact case -1, 1: // 0 < |x| < +Inf acc := Below if x.neg { acc = Above } if x.exp <= 0 { // 0 < |x| < 1 return 0, acc } // 1 <= |x| < +Inf if x.exp <= 63 { // i = trunc(x) fits into an int64 (excluding math.MinInt64) i := int64(high64(x.mant) >> (64 - uint32(x.exp))) if x.neg { i = -i } if x.minPrec() <= 63 { return i, Exact } return i, acc // x truncated } if x.neg { // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) if x.exp == 64 && x.minPrec() == 1 { acc = Exact } return math.MinInt64, acc } fallthrough case 2: // x == +Inf return math.MaxInt64, Below } panic("unreachable") } // Float64 returns the closest float64 value of x // by rounding to nearest with 53 bits precision. // TODO(gri) implement/document error scenarios. func (x *Float) Float64() (float64, Accuracy) { // x == ±Inf if x.exp == infExp { var sign int if x.neg { sign = -1 } return math.Inf(sign), Exact } // x == 0 if len(x.mant) == 0 { return 0, Exact } // x != 0 var r Float r.prec = 53 r.Set(x) var s uint64 if r.neg { s = 1 << 63 } e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow m := high64(r.mant) >> 11 & (1<<52 - 1) return math.Float64frombits(s | e<<52 | m), r.acc } // Int returns the result of truncating x towards zero; or nil // if x is an infinity. The result is Exact if x.IsInt(); // otherwise it is Below for x > 0, and Above for x < 0. func (x *Float) Int() (res *Int, acc Accuracy) { // TODO(gri) accept z argument for result storage (see Float.Rat below) if debugFloat { validate(x) } // accuracy for inexact results acc = Below // truncation if x.neg { acc = Above } // pick off easy cases if x.exp <= 0 { // |x| < 1 || |x| == Inf if x.exp == infExp { return nil, acc // ±Inf } if len(x.mant) == 0 { acc = Exact // ±0 } return new(Int), acc // ±0.xxx } // x.exp > 0 // x.mant[len(x.mant)-1] != 0 // determine minimum required precision for x allBits := uint(len(x.mant)) * _W exp := uint(x.exp) if x.minPrec() <= exp { acc = Exact } // shift mantissa as needed res = &Int{neg: x.neg} // TODO(gri) should have a shift that takes positive and negative shift counts switch { case exp > allBits: res.abs = res.abs.shl(x.mant, exp-allBits) default: res.abs = res.abs.set(x.mant) case exp < allBits: res.abs = res.abs.shr(x.mant, allBits-exp) } return } // Rat returns x converted into an exact fraction; or nil if x is an infinity. // If a non-nil *Rat argument z is provided, it is used to store the result; // otherwise a new Rat is allocated. func (x *Float) Rat(z *Rat) *Rat { if debugFloat { validate(x) } // pick off easy cases switch x.ord() { case -2, +2: return nil // ±Inf case 0: if z == nil { return new(Rat) } return z.SetInt64(0) } // x != 0 && x != ±Inf allBits := int32(len(x.mant)) * _W // build up numerator and denominator if z == nil { z = new(Rat) } z.a.neg = x.neg switch { case x.exp > allBits: z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) z.b.abs = z.b.abs[:0] // == 1 (see Rat) return z // already in normal form default: z.a.abs = z.a.abs.set(x.mant) z.b.abs = z.b.abs[:0] // == 1 (see Rat) return z // already in normal form case x.exp < allBits: z.a.abs = z.a.abs.set(x.mant) t := z.b.abs.setUint64(1) z.b.abs = t.shl(t, uint(allBits-x.exp)) return z.norm() } } // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) // and returns z. func (z *Float) Abs(x *Float) *Float { z.Set(x) z.neg = false return z } // Neg sets z to the (possibly rounded) value of x with its sign negated, // and returns z. func (z *Float) Neg(x *Float) *Float { z.Set(x) z.neg = !z.neg return z } // z = x + y, ignoring signs of x and y. // x and y must not be 0 or an Inf. func (z *Float) uadd(x, y *Float) { // Note: This implementation requires 2 shifts most of the // time. It is also inefficient if exponents or precisions // differ by wide margins. The following article describes // an efficient (but much more complicated) implementation // compatible with the internal representation used here: // // Vincent Lefèvre: "The Generic Multiple-Precision Floating- // Point Addition With Exact Rounding (as in the MPFR Library)" // http://www.vinc17.net/research/papers/rnc6.pdf if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("uadd called with 0 argument") } // compute exponents ex, ey for mantissa with "binary point" // on the right (mantissa.0) - use int64 to avoid overflow ex := int64(x.exp) - int64(len(x.mant))*_W ey := int64(y.exp) - int64(len(y.mant))*_W // TODO(gri) having a combined add-and-shift primitive // could make this code significantly faster switch { case ex < ey: // cannot re-use z.mant w/o testing for aliasing t := nat(nil).shl(y.mant, uint(ey-ex)) z.mant = z.mant.add(x.mant, t) default: // ex == ey, no shift needed z.mant = z.mant.add(x.mant, y.mant) case ex > ey: // cannot re-use z.mant w/o testing for aliasing t := nat(nil).shl(x.mant, uint(ex-ey)) z.mant = z.mant.add(t, y.mant) ex = ey } // len(z.mant) > 0 z.setExp(ex + int64(len(z.mant))*_W - fnorm(z.mant)) z.round(0) } // z = x - y for x >= y, ignoring signs of x and y. // x and y must not be 0 or an Inf. func (z *Float) usub(x, y *Float) { // This code is symmetric to uadd. // We have not factored the common code out because // eventually uadd (and usub) should be optimized // by special-casing, and the code will diverge. if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("usub called with 0 argument") } ex := int64(x.exp) - int64(len(x.mant))*_W ey := int64(y.exp) - int64(len(y.mant))*_W switch { case ex < ey: // cannot re-use z.mant w/o testing for aliasing t := nat(nil).shl(y.mant, uint(ey-ex)) z.mant = t.sub(x.mant, t) default: // ex == ey, no shift needed z.mant = z.mant.sub(x.mant, y.mant) case ex > ey: // cannot re-use z.mant w/o testing for aliasing t := nat(nil).shl(x.mant, uint(ex-ey)) z.mant = t.sub(t, y.mant) ex = ey } // operands may have cancelled each other out if len(z.mant) == 0 { z.acc = Exact z.setExp(0) return } // len(z.mant) > 0 z.setExp(ex + int64(len(z.mant))*_W - fnorm(z.mant)) z.round(0) } // z = x * y, ignoring signs of x and y. // x and y must not be 0 or an Inf. func (z *Float) umul(x, y *Float) { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("umul called with 0 argument") } // Note: This is doing too much work if the precision // of z is less than the sum of the precisions of x // and y which is often the case (e.g., if all floats // have the same precision). // TODO(gri) Optimize this for the common case. e := int64(x.exp) + int64(y.exp) z.mant = z.mant.mul(x.mant, y.mant) // normalize mantissa z.setExp(e - fnorm(z.mant)) z.round(0) } // z = x / y, ignoring signs of x and y. // x and y must not be 0 or an Inf. func (z *Float) uquo(x, y *Float) { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("uquo called with 0 argument") } // mantissa length in words for desired result precision + 1 // (at least one extra bit so we get the rounding bit after // the division) n := int(z.prec/_W) + 1 // compute adjusted x.mant such that we get enough result precision xadj := x.mant if d := n - len(x.mant) + len(y.mant); d > 0 { // d extra words needed => add d "0 digits" to x xadj = make(nat, len(x.mant)+d) copy(xadj[d:], x.mant) } // TODO(gri): If we have too many digits (d < 0), we should be able // to shorten x for faster division. But we must be extra careful // with rounding in that case. // Compute d before division since there may be aliasing of x.mant // (via xadj) or y.mant with z.mant. d := len(xadj) - len(y.mant) // divide var r nat z.mant, r = z.mant.div(nil, xadj, y.mant) // determine exponent e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W // normalize mantissa z.setExp(e - fnorm(z.mant)) // The result is long enough to include (at least) the rounding bit. // If there's a non-zero remainder, the corresponding fractional part // (if it were computed), would have a non-zero sticky bit (if it were // zero, it couldn't have a non-zero remainder). var sbit uint if len(r) > 0 { sbit = 1 } z.round(sbit) } // ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y, // while ignoring the signs of x and y. x and y must not be 0 or an Inf. func (x *Float) ucmp(y *Float) int { if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) { panic("ucmp called with 0 argument") } switch { case x.exp < y.exp: return -1 case x.exp > y.exp: return 1 } // x.exp == y.exp // compare mantissas i := len(x.mant) j := len(y.mant) for i > 0 || j > 0 { var xm, ym Word if i > 0 { i-- xm = x.mant[i] } if j > 0 { j-- ym = y.mant[j] } switch { case xm < ym: return -1 case xm > ym: return 1 } } return 0 } // Handling of sign bit as defined by IEEE 754-2008, // section 6.3 (note that there are no NaN Floats): // // When neither the inputs nor result are NaN, the sign of a product or // quotient is the exclusive OR of the operands’ signs; the sign of a sum, // or of a difference x−y regarded as a sum x+(−y), differs from at most // one of the addends’ signs; and the sign of the result of conversions, // the quantize operation, the roundToIntegral operations, and the // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. // These rules shall apply even when operands or results are zero or infinite. // // When the sum of two operands with opposite signs (or the difference of // two operands with like signs) is exactly zero, the sign of that sum (or // difference) shall be +0 in all rounding-direction attributes except // roundTowardNegative; under that attribute, the sign of an exact zero // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same // sign as x even when x is zero. // Add sets z to the rounded sum x+y and returns z. // If z's precision is 0, it is changed to the larger // of x's or y's precision before the operation. // Rounding is performed according to z's precision // and rounding mode; and z's accuracy reports the // result error relative to the exact (not rounded) // result. func (z *Float) Add(x, y *Float) *Float { if debugFloat { validate(x) validate(y) } if z.prec == 0 { z.prec = umax(x.prec, y.prec) } // TODO(gri) what about -0? if len(y.mant) == 0 { // TODO(gri) handle Inf return z.Set(x) } if len(x.mant) == 0 { // TODO(gri) handle Inf return z.Set(y) } // x, y != 0 z.neg = x.neg if x.neg == y.neg { // x + y == x + y // (-x) + (-y) == -(x + y) z.uadd(x, y) } else { // x + (-y) == x - y == -(y - x) // (-x) + y == y - x == -(x - y) if x.ucmp(y) >= 0 { z.usub(x, y) } else { z.neg = !z.neg z.usub(y, x) } } return z } // Sub sets z to the rounded difference x-y and returns z. // Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Sub(x, y *Float) *Float { if debugFloat { validate(x) validate(y) } if z.prec == 0 { z.prec = umax(x.prec, y.prec) } // TODO(gri) what about -0? if len(y.mant) == 0 { // TODO(gri) handle Inf return z.Set(x) } if len(x.mant) == 0 { return z.Neg(y) } // x, y != 0 z.neg = x.neg if x.neg != y.neg { // x - (-y) == x + y // (-x) - y == -(x + y) z.uadd(x, y) } else { // x - y == x - y == -(y - x) // (-x) - (-y) == y - x == -(x - y) if x.ucmp(y) >= 0 { z.usub(x, y) } else { z.neg = !z.neg z.usub(y, x) } } return z } // Mul sets z to the rounded product x*y and returns z. // Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Mul(x, y *Float) *Float { if debugFloat { validate(x) validate(y) } if z.prec == 0 { z.prec = umax(x.prec, y.prec) } // TODO(gri) handle Inf // TODO(gri) what about -0? if len(x.mant) == 0 || len(y.mant) == 0 { z.neg = false z.mant = z.mant[:0] z.exp = 0 z.acc = Exact return z } // x, y != 0 z.neg = x.neg != y.neg z.umul(x, y) return z } // Quo sets z to the rounded quotient x/y and returns z. // Precision, rounding, and accuracy reporting are as for Add. func (z *Float) Quo(x, y *Float) *Float { if debugFloat { validate(x) validate(y) } if z.prec == 0 { z.prec = umax(x.prec, y.prec) } // TODO(gri) handle Inf // TODO(gri) check that this is correct z.neg = x.neg != y.neg if len(y.mant) == 0 { z.setExp(infExp) return z } if len(x.mant) == 0 { z.mant = z.mant[:0] z.exp = 0 z.acc = Exact return z } // x, y != 0 z.uquo(x, y) return z } // Lsh sets z to the rounded x * (1< y // // Infinities with matching sign are equal. func (x *Float) Cmp(y *Float) int { if debugFloat { validate(x) validate(y) } mx := x.ord() my := y.ord() switch { case mx < my: return -1 case mx > my: return +1 } // mx == my // only if |mx| == 1 we have to compare the mantissae switch mx { case -1: return -x.ucmp(y) case +1: return +x.ucmp(y) } return 0 } func umax(x, y uint) uint { if x > y { return x } return y } // ord classifies x and returns: // // -2 if -Inf == x // -1 if -Inf < x < 0 // 0 if x == 0 (signed or unsigned) // +1 if 0 < x < +Inf // +2 if x == +Inf // // TODO(gri) export (and remove IsInf)? func (x *Float) ord() int { m := 1 // common case if len(x.mant) == 0 { m = 0 if x.exp == infExp { m = 2 } } if x.neg { m = -m } return m }