internal/strconv: use fast unrounded scaling for floating-point

Change floating-point parsing and printing to use fast unrounded scaling,
as presented in “Floating-Point Printing and Parsing Can Be Simple And Fast”,
https://research.swtch.com/fp

This CL deletes almost 900 lines of code while making printing much faster.
Parsing is simpler but about the same speed.

benchmark \ host                       local  linux-arm64       s7  linux-amd64  s7:GOARCH=386  linux-386
                                     vs base      vs base  vs base      vs base        vs base    vs base
Atof64Decimal                              ~       -0.47%   +1.67%       -2.70%         +1.21%     +2.70%
Atof64Float                                ~       +2.48%        ~            ~              ~          ~
Atof64FloatExp                             ~       +1.15%        ~            ~         +1.83%     +3.87%
Atof64Big                                  ~       +4.74%        ~            ~         +4.17%     +7.30%
Atof64RandomBits                     -27.95%            ~  -10.11%      -10.54%              ~          ~
Atof64RandomFloats                         ~       +0.75%        ~            ~              ~          ~
Atof64RandomLongFloats                -6.24%       -4.17%   -9.60%       -8.55%         -2.62%     -4.52%
Atof32Decimal                              ~       +3.24%   +3.76%       +3.89%              ~          ~
Atof32Float                                ~       +2.77%        ~            ~              ~          ~
Atof32FloatExp                             ~       +4.02%        ~            ~         +2.56%     +6.55%
Atof32Random                         -11.97%      -10.38%  -14.53%      -12.41%         -5.65%     -3.64%
Atof32RandomLong                     -22.77%      -22.45%  -20.00%      -26.52%        -13.36%    -13.84%
AppendFloat/Decimal                  +10.82%       +2.66%   -3.00%            ~              ~          ~
AppendFloat/Float                    -14.26%       -2.49%  -21.84%      -10.52%         -4.70%     -4.84%
AppendFloat/Exp                       +7.69%       +6.01%   -4.15%            ~         -1.04%          ~
AppendFloat/NegExp                    +8.95%       +5.75%   -4.73%            ~              ~          ~
AppendFloat/LongExp                  -33.48%      -29.01%  -31.61%      -34.75%        -23.30%    -21.21%
AppendFloat/Big                      -16.91%      -27.85%  -32.91%      -30.01%        -18.67%     -8.51%
AppendFloat/BinaryExp                -19.88%       -8.47%   -9.40%      -18.08%         -9.40%     -9.33%
AppendFloat/32Integer                +29.68%      -11.91%  -12.31%      -15.43%         +5.49%    +14.83%
AppendFloat/32ExactFraction          -14.62%       -7.34%  -14.28%      -13.78%         +6.46%    +15.68%
AppendFloat/32Point                        ~      -16.51%  -30.84%      -22.16%              ~    +11.04%
AppendFloat/32Exp                     -7.44%       -7.72%   -8.47%      -12.78%         +6.13%    +18.65%
AppendFloat/32NegExp                  -6.36%       -7.58%  -10.05%      -13.23%         +7.69%    +18.18%
AppendFloat/32Shortest               -17.45%      -18.23%  -18.03%      -19.29%         +2.68%    +11.41%
AppendFloat/32Fixed8Hard             -13.57%      -14.52%  -14.91%      -16.55%        -20.28%    -23.51%
AppendFloat/32Fixed9Hard             -16.06%      -16.69%  -11.75%      -19.42%         -5.12%          ~
AppendFloat/64Fixed1                  -3.25%       -9.70%   -8.67%      -14.11%        -13.08%    -14.23%
AppendFloat/64Fixed2                  -1.77%       -9.77%   -9.40%      -12.25%        -12.69%    -13.32%
AppendFloat/64Fixed2.5                -3.46%       -6.21%  -12.38%      -10.86%        -10.47%    -11.31%
AppendFloat/64Fixed3                       ~       -9.39%  -11.13%      -14.39%        -14.50%    -11.16%
AppendFloat/64Fixed4                       ~      -11.91%  -20.62%      -13.40%        -19.78%    -22.41%
AppendFloat/64Fixed5Hard              -6.45%       -7.36%  -13.88%      -12.42%        -12.31%    -12.92%
AppendFloat/64Fixed12                -26.39%      -23.15%  -29.45%      -28.11%        -24.63%    -27.61%
AppendFloat/64Fixed16                      ~      -15.85%  -21.24%      -19.76%        -24.24%    -26.14%
AppendFloat/64Fixed12Hard            -16.25%      -12.77%  -18.74%      -19.20%        -17.08%    -18.95%
AppendFloat/64Fixed17Hard            -16.81%       -9.80%  -12.77%      -17.19%         -2.75%     +6.06%
AppendFloat/64Fixed18Hard                  ~       -0.76%        ~            ~              ~    -26.49%
AppendFloat/64FixedF1                +16.15%      -12.93%  -18.60%      -18.24%         +1.57%          ~
AppendFloat/64FixedF2                -16.83%       -9.77%  -12.09%      -18.43%        -13.44%    -15.23%
AppendFloat/64FixedF3                      ~       -5.68%   -9.65%      -15.14%         -8.87%    -11.83%
AppendFloat/Slowpath64               -33.56%      -28.32%  -32.45%      -33.42%        -22.77%    -18.76%
AppendFloat/SlowpathDenormal64       -31.53%      -25.45%  -32.60%      -25.27%        -13.36%     -6.95%
AppendFloat/ShorterIntervalCase32    -19.52%      -14.41%  -13.89%      -17.03%              ~    +12.21%
AppendFloat/ShorterIntervalCase64    +14.00%      +14.94%   +4.06%            ~         +9.43%     +8.07%
AppendUint                           -33.66%      -13.19%  -11.52%      -13.39%        -13.68%     -9.04%
AppendUintVarlen/digits=1                  ~       -4.96%        ~            ~              ~    +13.97%
AppendUintVarlen/digits=2            +10.01%       +2.45%        ~            ~              ~    +11.23%
AppendUintVarlen/digits=3             -5.10%       +0.53%   +2.32%       +3.74%        +18.05%    +61.14%
AppendUintVarlen/digits=4                  ~      +14.32%        ~       +6.86%        +22.09%    +61.28%
AppendUintVarlen/digits=5            -18.17%       +0.62%   +1.13%            ~        +13.94%    +48.42%
AppendUintVarlen/digits=6             -8.74%       +7.58%   +2.47%       +7.86%        +17.45%    +50.58%
AppendUintVarlen/digits=7            -27.30%       -2.17%   -1.61%            ~         +8.31%    +37.41%
AppendUintVarlen/digits=8            -19.80%      +13.49%        ~            ~        +19.81%    +55.28%
AppendUintVarlen/digits=9            -28.86%       +3.29%        ~       -7.81%        +10.42%    +39.20%
AppendUintVarlen/digits=10           -33.46%       -8.00%  -12.57%      -19.07%         -8.59%     +7.48%
AppendUintVarlen/digits=11           -37.91%       -8.32%  -11.85%      -16.89%        -10.14%          ~
AppendUintVarlen/digits=12           -28.93%       -7.26%  -14.27%      -23.26%        -12.07%          ~
AppendUintVarlen/digits=13           -33.20%       -8.87%  -13.43%      -21.56%         -8.39%          ~
AppendUintVarlen/digits=14           -33.20%       -9.49%  -13.50%      -21.92%        -10.43%          ~
AppendUintVarlen/digits=15           -36.90%       -9.16%   -8.48%      -16.95%        -10.62%          ~
AppendUintVarlen/digits=16           -36.20%       -8.06%  -13.58%      -20.92%         -6.67%     +3.52%
AppendUintVarlen/digits=17           -36.15%       -7.47%  -14.12%      -21.53%         -6.00%     +3.88%
AppendUintVarlen/digits=18           -35.85%       -7.56%  -14.12%      -19.66%         -9.16%          ~
AppendUintVarlen/digits=19           -43.45%      -17.14%  -20.38%      -28.29%        -25.25%    -16.47%
AppendUintVarlen/digits=20           -40.70%      -13.60%  -18.66%      -24.18%        -24.69%    -17.33%

Change-Id: I4eed57cfbf398b5d5327efd749e13610e17153e9
Reviewed-on: https://go-review.googlesource.com/c/go/+/743860
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Neal Patel <neal@golang.org>
Reviewed-by: Neal Patel <nealpatel@google.com>
This commit is contained in:
Russ Cox 2026-01-30 12:09:42 -05:00
parent fd7a0e680d
commit 71300e8011
18 changed files with 1365 additions and 2216 deletions

View file

@ -243,6 +243,10 @@ func TestIntendedInlining(t *testing.T) {
"path/filepath": {
"scanChunk",
},
"internal/strconv": {
"prescale",
"uscale",
},
}
if runtime.GOARCH != "386" && runtime.GOARCH != "loong64" && runtime.GOARCH != "mips64" && runtime.GOARCH != "mips64le" && runtime.GOARCH != "riscv64" {

View file

@ -4,6 +4,17 @@
package strconv
type floatInfo struct {
mantbits uint
expbits uint
bias int
}
var (
float32info = floatInfo{float32MantBits, float32ExpBits, float32Bias}
float64info = floatInfo{float64MantBits, float64ExpBits, float64Bias}
)
// decimal to binary floating point conversion.
// Algorithm:
// 1) Store input in multiprecision decimal.
@ -565,45 +576,54 @@ func atof32(s string) (f float32, n int, err error) {
return float32(val), n, nil
}
mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
d, p, neg, trunc, hex, n, ok := readFloat(s)
if !ok {
return 0, n, ErrSyntax
}
if hex {
f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
f, err := atofHex(s[:n], &float32info, d, p, neg, trunc)
return float32(f), n, err
}
if optimize {
// Try pure floating-point arithmetic conversion, and if that fails,
// the Eisel-Lemire algorithm.
sign := bool2[uint32](neg) << 31
if d == 0 {
return float32frombits(sign | 0), n, nil
}
if p > 40 { // overflow to ±Inf
return float32frombits(sign | 0xff<<23), n, ErrRange
}
if p < -70 { // underflow to ±0
return float32frombits(sign | 0), n, nil
}
if !trunc {
if f, ok := atof32exact(mantissa, exp, neg); ok {
// Exact rounding with single multiplication or division.
if f, ok := atof32exact(d, p, neg); ok {
return f, n, nil
}
}
f, ok := eiselLemire32(mantissa, exp, neg)
if ok {
if !trunc {
return f, n, nil
}
// Even if the mantissa was truncated, we may
// have found the correct result. Confirm by
// converting the upper mantissa bound.
fUp, ok := eiselLemire32(mantissa+1, exp, neg)
if ok && f == fUp {
return f, n, nil
}
// Use fast unrounded scaling.
// The only possible err is ErrRange, when the result overflows to ±Inf.
f, err := parseFloat32(d, p, sign)
if !trunc {
return f, n, err
}
// If additional digits were truncated from d
// but d+1 converts to the same value,
// then the additional digits don't matter.
f1, _ := parseFloat32(d+1, p, sign)
if f == f1 {
return f, n, err
}
}
// Slow fallback.
var d decimal
if !d.set(s[:n]) {
var dec decimal
if !dec.set(s[:n]) {
return 0, n, ErrSyntax
}
b, ovf := d.floatBits(&float32info)
b, ovf := dec.floatBits(&float32info)
f = float32frombits(uint32(b))
if ovf {
err = ErrRange
@ -616,45 +636,52 @@ func atof64(s string) (f float64, n int, err error) {
return val, n, nil
}
mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
d, p, neg, trunc, hex, n, ok := readFloat(s)
if !ok {
return 0, n, ErrSyntax
}
if hex {
f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
f, err := atofHex(s[:n], &float64info, d, p, neg, trunc)
return f, n, err
}
if optimize {
// Try pure floating-point arithmetic conversion, and if that fails,
// the Eisel-Lemire algorithm.
sign := bool2[uint64](neg) << 63
if d == 0 {
return float64frombits(sign | 0), n, nil
}
if p > 310 { // overflow to ±Inf
return float64frombits(sign | 0x7ff<<52), n, ErrRange
}
if p < -345 { // underflow to ±0
return float64frombits(sign | 0), n, nil
}
if !trunc {
if f, ok := atof64exact(mantissa, exp, neg); ok {
// Exact rounding with single multiplication or division.
if f, ok := atof64exact(d, p, neg); ok {
return f, n, nil
}
}
f, ok := eiselLemire64(mantissa, exp, neg)
if ok {
if !trunc {
return f, n, nil
}
// Even if the mantissa was truncated, we may
// have found the correct result. Confirm by
// converting the upper mantissa bound.
fUp, ok := eiselLemire64(mantissa+1, exp, neg)
if ok && f == fUp {
return f, n, nil
}
// Use fast unrounded scaling.
// The only possible err is ErrRange, when the result overflows to ±Inf.
f, err := parseFloat64(d, p, sign)
if !trunc {
return f, n, err
}
// If additional digits were truncated from d
// but d+1 converts to the same value,
// then the additional digits don't matter.
f1, _ := parseFloat64(d+1, p, sign)
if f == f1 {
return f, n, err
}
}
// Slow fallback.
var d decimal
if !d.set(s[:n]) {
var dec decimal
if !dec.set(s[:n]) {
return 0, n, ErrSyntax
}
b, ovf := d.floatBits(&float64info)
b, ovf := dec.floatBits(&float64info)
f = float64frombits(b)
if ovf {
err = ErrRange

View file

@ -196,6 +196,9 @@ var atoftests = []atofTest{
// way too small
{"1e-350", "0", nil},
{"1e-400000", "0", nil},
{"1e-345", "0", nil}, // picked off in atof64
{"1e-343", "0", nil}, // large c.s in parseFloat64
{"9.999999999999999999e-343", "0", nil},
// Near denormals and denormals.
{"0x2.00000000000000p-1010", "1.8227805048890994e-304", nil}, // 0x00e0000000000000
@ -420,6 +423,11 @@ var atof32tests = []atofTest{
{"0x0.0000008p-125", "0", nil}, // rounded down
{"0x0.0000007p-125", "0", nil}, // rounded down
{"1e-70", "0", nil}, // picked off in atof32
{"1e-65", "0", nil}, // picked off in atof32
{"1e-64", "0", nil}, // large c.s in parseFloat32
{"9.999999999999999999e-64", "0", nil},
// 2^92 = 8388608p+69 = 4951760157141521099596496896 (4.9517602e27)
// is an exact power of two that needs 8 decimal digits to be correctly
// parsed back.

View file

@ -1,166 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strconv
// This file implements the Eisel-Lemire ParseFloat algorithm, published in
// 2020 and discussed extensively at
// https://nigeltao.github.io/blog/2020/eisel-lemire.html
//
// The original C++ implementation is at
// https://github.com/lemire/fast_double_parser/blob/644bef4306059d3be01a04e77d3cc84b379c596f/include/fast_double_parser.h#L840
//
// This Go re-implementation closely follows the C re-implementation at
// https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/internal/cgen/base/floatconv-submodule-code.c#L990
//
// Additional testing (on over several million test strings) is done by
// https://github.com/nigeltao/parse-number-fxx-test-data/blob/5280dcfccf6d0b02a65ae282dad0b6d9de50e039/script/test-go-strconv.go
import (
"math/bits"
)
func eiselLemire64(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// The terse comments in this function body refer to sections of the
// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
// Exp10 Range.
if man == 0 {
if neg {
f = float64frombits(0x8000000000000000) // Negative zero.
}
return f, true
}
pow, exp2, ok := pow10(exp10)
if !ok {
return 0, false
}
// Normalization.
clz := bits.LeadingZeros64(man)
man <<= uint(clz)
retExp2 := uint64(exp2+63-float64Bias) - uint64(clz)
// Multiplication.
xHi, xLo := bits.Mul64(man, pow.Hi)
// Wider Approximation.
if xHi&0x1FF == 0x1FF && xLo+man < man {
yHi, yLo := bits.Mul64(man, pow.Lo)
mergedHi, mergedLo := xHi, xLo+yHi
if mergedLo < xLo {
mergedHi++
}
if mergedHi&0x1FF == 0x1FF && mergedLo+1 == 0 && yLo+man < man {
return 0, false
}
xHi, xLo = mergedHi, mergedLo
}
// Shifting to 54 Bits.
msb := xHi >> 63
retMantissa := xHi >> (msb + 9)
retExp2 -= 1 ^ msb
// Half-way Ambiguity.
if xLo == 0 && xHi&0x1FF == 0 && retMantissa&3 == 1 {
return 0, false
}
// From 54 to 53 Bits.
retMantissa += retMantissa & 1
retMantissa >>= 1
if retMantissa>>53 > 0 {
retMantissa >>= 1
retExp2 += 1
}
// retExp2 is a uint64. Zero or underflow means that we're in subnormal
// float64 space. 0x7FF or above means that we're in Inf/NaN float64 space.
//
// The if block is equivalent to (but has fewer branches than):
// if retExp2 <= 0 || retExp2 >= 0x7FF { etc }
if retExp2-1 >= 0x7FF-1 {
return 0, false
}
retBits := retExp2<<float64MantBits | retMantissa&(1<<float64MantBits-1)
if neg {
retBits |= 0x8000000000000000
}
return float64frombits(retBits), true
}
func eiselLemire32(man uint64, exp10 int, neg bool) (f float32, ok bool) {
// The terse comments in this function body refer to sections of the
// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
//
// That blog post discusses the float64 flavor (11 exponent bits with a
// -1023 bias, 52 mantissa bits) of the algorithm, but the same approach
// applies to the float32 flavor (8 exponent bits with a -127 bias, 23
// mantissa bits). The computation here happens with 64-bit values (e.g.
// man, xHi, retMantissa) before finally converting to a 32-bit float.
// Exp10 Range.
if man == 0 {
if neg {
f = float32frombits(0x80000000) // Negative zero.
}
return f, true
}
pow, exp2, ok := pow10(exp10)
if !ok {
return 0, false
}
// Normalization.
clz := bits.LeadingZeros64(man)
man <<= uint(clz)
retExp2 := uint64(exp2+63-float32Bias) - uint64(clz)
// Multiplication.
xHi, xLo := bits.Mul64(man, pow.Hi)
// Wider Approximation.
if xHi&0x3FFFFFFFFF == 0x3FFFFFFFFF && xLo+man < man {
yHi, yLo := bits.Mul64(man, pow.Lo)
mergedHi, mergedLo := xHi, xLo+yHi
if mergedLo < xLo {
mergedHi++
}
if mergedHi&0x3FFFFFFFFF == 0x3FFFFFFFFF && mergedLo+1 == 0 && yLo+man < man {
return 0, false
}
xHi, xLo = mergedHi, mergedLo
}
// Shifting to 54 Bits (and for float32, it's shifting to 25 bits).
msb := xHi >> 63
retMantissa := xHi >> (msb + 38)
retExp2 -= 1 ^ msb
// Half-way Ambiguity.
if xLo == 0 && xHi&0x3FFFFFFFFF == 0 && retMantissa&3 == 1 {
return 0, false
}
// From 54 to 53 Bits (and for float32, it's from 25 to 24 bits).
retMantissa += retMantissa & 1
retMantissa >>= 1
if retMantissa>>24 > 0 {
retMantissa >>= 1
retExp2 += 1
}
// retExp2 is a uint64. Zero or underflow means that we're in subnormal
// float32 space. 0xFF or above means that we're in Inf/NaN float32 space.
//
// The if block is equivalent to (but has fewer branches than):
// if retExp2 <= 0 || retExp2 >= 0xFF { etc }
if retExp2-1 >= 0xFF-1 {
return 0, false
}
retBits := retExp2<<float32MantBits | retMantissa&(1<<float32MantBits-1)
if neg {
retBits |= 0x80000000
}
return float32frombits(uint32(retBits)), true
}

View file

@ -90,6 +90,7 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) {
// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
const maxUint64 = 1<<64 - 1
var cutoff uint64
switch base {
case 10:

View file

@ -4,23 +4,10 @@
package strconv
type Uint128 = uint128
const (
Pow10Min = pow10Min
Pow10Max = pow10Max
)
var (
MulLog10_2 = mulLog10_2
MulLog2_10 = mulLog2_10
Log10Pow2 = log10Pow2
Log2Pow10 = log2Pow10
ParseFloatPrefix = parseFloatPrefix
Pow10 = pow10
Umul128 = umul128
Umul192 = umul192
Div5Tab = div5Tab
DivisiblePow5 = divisiblePow5
TrimZeros = trimZeros
)
func NewDecimal(i uint64) *decimal {

View file

@ -10,29 +10,26 @@
package strconv
import (
"math/bits"
"unsafe"
)
const (
lowerhex = "0123456789abcdef"
upperhex = "0123456789ABCDEF"
)
type floatInfo struct {
mantbits uint
expbits uint
bias int
}
const (
float32MantBits = 23
float32ExpBits = 8
float32Bias = -127
float32MinExp = -189
float64MantBits = 52
float64ExpBits = 11
float64Bias = -1023
)
var (
float32info = floatInfo{float32MantBits, float32ExpBits, float32Bias}
float64info = floatInfo{float64MantBits, float64ExpBits, float64Bias}
float64MinExp = -1085
)
// FormatFloat converts the floating-point number f to a string,
@ -60,179 +57,162 @@ var (
// The exponent is written as a decimal integer;
// for all formats other than 'b', it will be at least two digits.
func FormatFloat(f float64, fmt byte, prec, bitSize int) string {
return string(genericFtoa(make([]byte, 0, max(prec+4, 24)), f, fmt, prec, bitSize))
if bitSize == 32 {
return string(ftoa(make([]byte, 0, max(prec+4, 24)), float32(f), fmt, prec))
}
if bitSize == 64 {
return string(ftoa(make([]byte, 0, max(prec+4, 24)), f, fmt, prec))
}
panic("strconv: illegal FormatFloat bitSize")
}
// AppendFloat appends the string form of the floating-point number f,
// as generated by [FormatFloat], to dst and returns the extended buffer.
func AppendFloat(dst []byte, f float64, fmt byte, prec, bitSize int) []byte {
return genericFtoa(dst, f, fmt, prec, bitSize)
if bitSize == 32 {
return ftoa(dst, float32(f), fmt, prec)
}
if bitSize == 64 {
return ftoa(dst, f, fmt, prec)
}
panic("strconv: illegal AppendFloat bitSize")
}
func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
var bits uint64
var flt *floatInfo
switch bitSize {
func ftoa[F float32 | float64](dst []byte, val F, fmt byte, prec int) []byte {
var b uint64
var expBits, mantBits, bias int // parameterized constants
switch 8 * unsafe.Sizeof(val) {
case 32:
bits = uint64(float32bits(float32(val)))
flt = &float32info
b = uint64(float32bits(float32(val)))
expBits = float32ExpBits
mantBits = float32MantBits
bias = float32Bias
case 64:
bits = float64bits(val)
flt = &float64info
default:
panic("strconv: illegal AppendFloat/FormatFloat bitSize")
b = float64bits(float64(val))
expBits = float64ExpBits
mantBits = float64MantBits
bias = float64Bias
}
neg := bits>>(flt.expbits+flt.mantbits) != 0
exp := int(bits>>flt.mantbits) & (1<<flt.expbits - 1)
mant := bits & (uint64(1)<<flt.mantbits - 1)
denorm := false
switch exp {
case 1<<flt.expbits - 1:
// Inf, NaN
var s string
switch {
case mant != 0:
s = "NaN"
case neg:
s = "-Inf"
default:
s = "+Inf"
neg := b>>(expBits+mantBits) != 0
exp := int(b>>mantBits) & (1<<expBits - 1)
mant := b & (1<<mantBits - 1)
if exp == 1<<expBits-1 {
if mant != 0 {
return append(dst, "NaN"...)
}
return append(dst, s...)
case 0:
// denormalized
exp++
denorm = true
default:
// add implicit top bit
mant |= uint64(1) << flt.mantbits
if neg {
return append(dst, "-Inf"...)
}
return append(dst, "+Inf"...)
}
exp += flt.bias
if exp == 0 {
exp++
} else {
mant |= 1 << mantBits
}
exp += bias
// Pick off easy binary, hex formats.
if fmt == 'b' {
return fmtB(dst, neg, mant, exp, flt)
return fmtB(dst, neg, mant, exp-mantBits)
}
if fmt == 'x' || fmt == 'X' {
return fmtX(dst, prec, fmt, neg, mant, exp, flt)
return fmtX(dst, prec, fmt, neg, mant, exp, mantBits)
}
if !optimize {
return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
// Pick off zero.
if mant == 0 {
return fmtEFG(dst, neg, nil, 0, 0, prec, fmt, prec < 0)
}
// Negative precision means "only as much as needed to be exact."
shortest := prec < 0
var digs decimalSlice
if mant == 0 {
return formatDigits(dst, shortest, neg, digs, prec, fmt)
}
if shortest {
// Use the Dragonbox algorithm.
if prec < 0 {
// Use fast unrounded scaling.
var buf [32]byte
digs.d = buf[:]
dboxFtoa(&digs, mant, exp-int(flt.mantbits), denorm, bitSize)
s := 64 - bits.Len64(mant)
m := mant << s
e := exp - s
d, p := shortFloat[F](m, e-mantBits)
dp, nd := setDigits(buf[:], d, p, numDigits(d))
// Precision for shortest representation mode.
switch fmt {
case 'e', 'E':
prec = max(digs.nd-1, 0)
prec = max(nd-1, 0)
case 'f':
prec = max(digs.nd-digs.dp, 0)
prec = max(nd-dp, 0)
case 'g', 'G':
prec = digs.nd
prec = nd
}
return formatDigits(dst, shortest, neg, digs, prec, fmt)
return fmtEFG(dst, neg, buf[:], dp, nd, prec, fmt, true)
}
// Fixed number of digits.
digits := prec
switch fmt {
case 'f':
// %f precision specifies digits after the decimal point.
// Estimate an upper bound on the total number of digits needed.
// ftoaFixed will shorten as needed according to prec.
if exp >= 0 {
digits = 1 + mulLog10_2(1+exp) + prec
} else {
digits = 1 + prec - mulLog10_2(-exp)
}
case 'e', 'E':
digits++
case 'g', 'G':
if prec == 0 {
prec = 1
}
digits = prec
default:
// Invalid mode.
digits = 1
}
if digits <= 18 {
// digits <= 0 happens for %f on very small numbers
// and means that we're guaranteed to print all zeros.
if digits > 0 {
var buf [24]byte
digs.d = buf[:]
fixedFtoa(&digs, mant, exp-int(flt.mantbits), digits, prec, fmt)
}
return formatDigits(dst, false, neg, digs, prec, fmt)
}
return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
}
// bigFtoa uses multiprecision computations to format a float.
func bigFtoa(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
d := new(decimal)
d.Assign(mant)
d.Shift(exp - int(flt.mantbits))
var digs decimalSlice
shortest := prec < 0
if shortest {
roundShortest(d, mant, exp, flt)
digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
// Precision for shortest representation mode.
if optimize {
// Fixed number of digits.
digits := prec
switch fmt {
case 'e', 'E':
prec = digs.nd - 1
case 'f':
prec = max(digs.nd-digs.dp, 0)
case 'g', 'G':
prec = digs.nd
}
} else {
// Round appropriately.
switch fmt {
// %f precision specifies digits after the decimal point.
// Estimate an upper bound on the total number of digits needed.
// ftoaFixed will shorten as needed according to prec.
if exp >= 0 {
digits = 1 + log10Pow2(1+exp) + prec
} else {
digits = 1 + prec - log10Pow2(-exp)
}
case 'e', 'E':
d.Round(prec + 1)
case 'f':
d.Round(d.dp + prec)
digits++
case 'g', 'G':
if prec == 0 {
prec = 1
}
d.Round(prec)
digits = prec
default:
// Invalid mode.
digits = 1
}
if digits <= 18 {
// digits <= 0 happens for %f on very small numbers
// and means that we're guaranteed to print all zeros.
var buf [24]byte
var dp, nd int
if digits > 0 {
s := 64 - bits.Len64(mant)
m := mant << s
e := exp - s
d, p := fixedWidthFloat(m, e-mantBits, digits, prec, fmt)
if d != 0 {
dp, nd = setDigits(buf[:], d, p, numDigits(d))
}
}
return fmtEFG(dst, neg, buf[:], dp, nd, prec, fmt, false)
}
digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
}
return formatDigits(dst, shortest, neg, digs, prec, fmt)
}
func formatDigits(dst []byte, shortest bool, neg bool, digs decimalSlice, prec int, fmt byte) []byte {
// Slow bignum case. Only for non-shortest results.
d := new(decimal)
d.Assign(mant)
d.Shift(exp - mantBits)
switch fmt {
case 'e', 'E':
return fmtE(dst, neg, digs, prec, fmt)
d.Round(prec + 1)
case 'f':
return fmtF(dst, neg, digs, prec)
d.Round(d.dp + prec)
case 'g', 'G':
if prec == 0 {
prec = 1
}
d.Round(prec)
}
return fmtEFG(dst, neg, d.d[:], d.dp, d.nd, prec, fmt, false)
}
func fmtEFG(dst []byte, neg bool, s []byte, dp, nd, prec int, fmt byte, shortest bool) []byte {
if fmt == 'g' || fmt == 'G' {
// trailing fractional zeros in 'e' form will be trimmed.
eprec := prec
if eprec > digs.nd && digs.nd >= digs.dp {
eprec = digs.nd
if eprec > nd && nd >= dp {
eprec = nd
}
// %e is used if the exponent from the conversion
// is less than -4 or greater than or equal to the precision.
@ -240,286 +220,141 @@ func formatDigits(dst []byte, shortest bool, neg bool, digs decimalSlice, prec i
if shortest {
eprec = 6
}
exp := digs.dp - 1
exp := dp - 1
if exp < -4 || exp >= eprec {
if prec > digs.nd {
prec = digs.nd
if prec > nd {
prec = nd
}
return fmtE(dst, neg, digs, prec-1, fmt+'e'-'g')
prec--
fmt = fmt + 'e' - 'g'
} else {
if prec > dp {
prec = nd
}
prec = max(prec-dp, 0)
fmt = 'f'
}
if prec > digs.dp {
prec = digs.nd
}
switch fmt {
case 'e', 'E': // %e: -d.ddddde±dd
// sign
if neg {
dst = append(dst, '-')
}
return fmtF(dst, neg, digs, max(prec-digs.dp, 0))
// first digit
ch := byte('0')
if nd != 0 {
ch = s[0]
}
dst = append(dst, ch)
// .moredigits
if prec > 0 {
dst = append(dst, '.')
i := 1
m := min(nd, prec+1)
if i < m {
dst = append(dst, s[i:m]...)
i = m
}
for range prec + 1 - i {
dst = append(dst, '0')
}
}
// e±
dst = append(dst, fmt)
exp := dp - 1
if nd == 0 { // special case: 0 has exponent 0
exp = 0
}
if exp < 0 {
ch = '-'
exp = -exp
} else {
ch = '+'
}
dst = append(dst, ch)
// dd or ddd
switch {
case exp < 10:
dst = append(dst, '0', byte(exp)+'0')
case exp < 100:
dst = append(dst, byte(exp/10)+'0', byte(exp%10)+'0')
default:
dst = append(dst, byte(exp/100)+'0', byte(exp/10)%10+'0', byte(exp%10)+'0')
}
return dst
case 'f': // %f: -ddddddd.ddddd
// sign
if neg {
dst = append(dst, '-')
}
// integer, padded with zeros as needed.
if dp > 0 {
m := min(nd, dp)
for _, c := range s[:m] {
dst = append(dst, c)
}
for range dp - m {
dst = append(dst, '0')
}
} else {
dst = append(dst, '0')
}
// fraction
if prec > 0 {
dst = append(dst, '.')
lz := min(prec, max(0, -dp)) // leading zeros
m := min(prec-lz, max(0, nd-dp)) // middle digits
tz := max(0, prec-lz-m) // trailing zeros
for range lz {
dst = append(dst, '0')
}
off := dp + lz
for i := range m {
dst = append(dst, s[off+i])
}
for range tz {
dst = append(dst, '0')
}
}
return dst
}
// unknown format
return append(dst, '%', fmt)
}
// roundShortest rounds d (= mant * 2^exp) to the shortest number of digits
// that will let the original floating point value be precisely reconstructed.
func roundShortest(d *decimal, mant uint64, exp int, flt *floatInfo) {
// If mantissa is zero, the number is zero; stop now.
if mant == 0 {
d.nd = 0
return
}
// Compute upper and lower such that any decimal number
// between upper and lower (possibly inclusive)
// will round to the original floating point number.
// We may see at once that the number is already shortest.
//
// Suppose d is not denormal, so that 2^exp <= d < 10^dp.
// The closest shorter number is at least 10^(dp-nd) away.
// The lower/upper bounds computed below are at distance
// at most 2^(exp-mantbits).
//
// So the number is already shortest if 10^(dp-nd) > 2^(exp-mantbits),
// or equivalently log2(10)*(dp-nd) > exp-mantbits.
// It is true if 332/100*(dp-nd) >= exp-mantbits (log2(10) > 3.32).
minexp := flt.bias + 1 // minimum possible exponent
if exp > minexp && 332*(d.dp-d.nd) >= 100*(exp-int(flt.mantbits)) {
// The number is already shortest.
return
}
// d = mant << (exp - mantbits)
// Next highest floating point number is mant+1 << exp-mantbits.
// Our upper bound is halfway between, mant*2+1 << exp-mantbits-1.
upper := new(decimal)
upper.Assign(mant*2 + 1)
upper.Shift(exp - int(flt.mantbits) - 1)
// d = mant << (exp - mantbits)
// Next lowest floating point number is mant-1 << exp-mantbits,
// unless mant-1 drops the significant bit and exp is not the minimum exp,
// in which case the next lowest is mant*2-1 << exp-mantbits-1.
// Either way, call it mantlo << explo-mantbits.
// Our lower bound is halfway between, mantlo*2+1 << explo-mantbits-1.
var mantlo uint64
var explo int
if mant > 1<<flt.mantbits || exp == minexp {
mantlo = mant - 1
explo = exp
} else {
mantlo = mant*2 - 1
explo = exp - 1
}
lower := new(decimal)
lower.Assign(mantlo*2 + 1)
lower.Shift(explo - int(flt.mantbits) - 1)
// The upper and lower bounds are possible outputs only if
// the original mantissa is even, so that IEEE round-to-even
// would round to the original mantissa and not the neighbors.
inclusive := mant%2 == 0
// As we walk the digits we want to know whether rounding up would fall
// within the upper bound. This is tracked by upperdelta:
//
// If upperdelta == 0, the digits of d and upper are the same so far.
//
// If upperdelta == 1, we saw a difference of 1 between d and upper on a
// previous digit and subsequently only 9s for d and 0s for upper.
// (Thus rounding up may fall outside the bound, if it is exclusive.)
//
// If upperdelta == 2, then the difference is greater than 1
// and we know that rounding up falls within the bound.
var upperdelta uint8
// Now we can figure out the minimum number of digits required.
// Walk along until d has distinguished itself from upper and lower.
for ui := 0; ; ui++ {
// lower, d, and upper may have the decimal points at different
// places. In this case upper is the longest, so we iterate from
// ui==0 and start li and mi at (possibly) -1.
mi := ui - upper.dp + d.dp
if mi >= d.nd {
break
}
li := ui - upper.dp + lower.dp
l := byte('0') // lower digit
if li >= 0 && li < lower.nd {
l = lower.d[li]
}
m := byte('0') // middle digit
if mi >= 0 {
m = d.d[mi]
}
u := byte('0') // upper digit
if ui < upper.nd {
u = upper.d[ui]
}
// Okay to round down (truncate) if lower has a different digit
// or if lower is inclusive and is exactly the result of rounding
// down (i.e., and we have reached the final digit of lower).
okdown := l != m || inclusive && li+1 == lower.nd
switch {
case upperdelta == 0 && m+1 < u:
// Example:
// m = 12345xxx
// u = 12347xxx
upperdelta = 2
case upperdelta == 0 && m != u:
// Example:
// m = 12345xxx
// u = 12346xxx
upperdelta = 1
case upperdelta == 1 && (m != '9' || u != '0'):
// Example:
// m = 1234598x
// u = 1234600x
upperdelta = 2
}
// Okay to round up if upper has a different digit and either upper
// is inclusive or upper is bigger than the result of rounding up.
okup := upperdelta > 0 && (inclusive || upperdelta > 1 || ui+1 < upper.nd)
// If it's okay to do either, then round to the nearest one.
// If it's okay to do only one, do it.
switch {
case okdown && okup:
d.Round(mi + 1)
return
case okdown:
d.RoundDown(mi + 1)
return
case okup:
d.RoundUp(mi + 1)
return
}
}
}
type decimalSlice struct {
d []byte
nd, dp int
}
// %e: -d.ddddde±dd
func fmtE(dst []byte, neg bool, d decimalSlice, prec int, fmt byte) []byte {
// sign
if neg {
dst = append(dst, '-')
}
// first digit
ch := byte('0')
if d.nd != 0 {
ch = d.d[0]
}
dst = append(dst, ch)
// .moredigits
if prec > 0 {
dst = append(dst, '.')
i := 1
m := min(d.nd, prec+1)
if i < m {
dst = append(dst, d.d[i:m]...)
i = m
}
for ; i <= prec; i++ {
dst = append(dst, '0')
}
}
// e±
dst = append(dst, fmt)
exp := d.dp - 1
if d.nd == 0 { // special case: 0 has exponent 0
exp = 0
}
if exp < 0 {
ch = '-'
exp = -exp
} else {
ch = '+'
}
dst = append(dst, ch)
// dd or ddd
switch {
case exp < 10:
dst = append(dst, '0', byte(exp)+'0')
case exp < 100:
dst = append(dst, byte(exp/10)+'0', byte(exp%10)+'0')
default:
dst = append(dst, byte(exp/100)+'0', byte(exp/10)%10+'0', byte(exp%10)+'0')
}
return dst
}
// %f: -ddddddd.ddddd
func fmtF(dst []byte, neg bool, d decimalSlice, prec int) []byte {
// sign
if neg {
dst = append(dst, '-')
}
// integer, padded with zeros as needed.
if d.dp > 0 {
m := min(d.nd, d.dp)
dst = append(dst, d.d[:m]...)
for ; m < d.dp; m++ {
dst = append(dst, '0')
}
} else {
dst = append(dst, '0')
}
// fraction
if prec > 0 {
dst = append(dst, '.')
for i := 0; i < prec; i++ {
ch := byte('0')
if j := d.dp + i; 0 <= j && j < d.nd {
ch = d.d[j]
}
dst = append(dst, ch)
}
}
return dst
}
// %b: -ddddddddp±ddd
func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
// sign
func fmtB(dst []byte, neg bool, mant uint64, exp int) []byte {
if neg {
dst = append(dst, '-')
}
// mantissa
dst = AppendUint(dst, mant, 10)
// p
dst = append(dst, 'p')
// ±exponent
exp -= int(flt.mantbits)
if exp >= 0 {
dst = append(dst, '+')
}
dst = AppendInt(dst, int64(exp), 10)
return dst
}
// %x: -0x1.yyyyyyyyp±ddd or -0x0p+0. (y is hex digit, d is decimal digit)
func fmtX(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
func fmtX(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp, mantBits int) []byte {
if mant == 0 {
exp = 0
}
// Shift digits so leading 1 (if any) is at bit 1<<60.
mant <<= 60 - flt.mantbits
// TODO: Is this the right way to handle subnormals?
mant <<= 60 - mantBits
for mant != 0 && mant&(1<<60) == 0 {
mant <<= 1
exp--

View file

@ -136,6 +136,7 @@ var ftoatests = []ftoaTest{
{fdiv(5e-304, 1e20), 'g', -1, "5e-324"}, // avoid constant arithmetic
{fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"}, // avoid constant arithmetic
{fdiv(5e-304, 1e20), 'e', -1, "5e-324"},
{32, 'g', -1, "32"},
{32, 'g', 0, "3e+01"},

View file

@ -1,349 +0,0 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strconv
// Binary to decimal conversion using the Dragonbox algorithm by Junekey Jeon.
//
// Fixed precision format is not supported by the Dragonbox algorithm
// so we continue to use Ryū-printf for this purpose.
// See https://github.com/jk-jeon/dragonbox/issues/38 for more details.
//
// For binary to decimal rounding, uses round to nearest, tie to even.
// For decimal to binary rounding, assumes round to nearest, tie to even.
//
// The original paper by Junekey Jeon can be found at:
// https://github.com/jk-jeon/dragonbox/blob/d5dc40ae6a3f1a4559cda816738df2d6255b4e24/other_files/Dragonbox.pdf
//
// The reference implementation in C++ by Junekey Jeon can be found at:
// https://github.com/jk-jeon/dragonbox/blob/6c7c925b571d54486b9ffae8d9d18a822801cbda/subproject/simple/include/simple_dragonbox.h
// dragonboxFtoa computes the decimal significand and exponent
// from the binary significand and exponent using the Dragonbox algorithm
// and formats the decimal floating point number in d.
func dboxFtoa(d *decimalSlice, mant uint64, exp int, denorm bool, bitSize int) {
if bitSize == 32 {
dboxFtoa32(d, uint32(mant), exp, denorm)
return
}
dboxFtoa64(d, mant, exp, denorm)
}
func dboxFtoa64(d *decimalSlice, mant uint64, exp int, denorm bool) {
if mant == 1<<float64MantBits && !denorm {
// Algorithm 5.6 (page 24).
k0 := -mulLog10_2MinusLog10_4Over3(exp)
φ, β := dboxPow64(k0, exp)
xi, zi := dboxRange64(φ, β)
if exp != 2 && exp != 3 {
xi++
}
q := zi / 10
if xi <= q*10 {
q, zeros := trimZeros(q)
dboxDigits(d, q, -k0+1+zeros)
return
}
yru := dboxRoundUp64(φ, β)
if exp == -77 && yru%2 != 0 {
yru--
} else if yru < xi {
yru++
}
dboxDigits(d, yru, -k0)
return
}
// κ = 2 for float64 (section 5.1.3)
const (
κ = 2
p10κ = 100 // 10**κ
p10κ1 = p10κ * 10 // 10**(κ+1)
)
// Algorithm 5.2 (page 15).
k0 := -mulLog10_2(exp)
φ, β := dboxPow64(κ+k0, exp)
zi, exact := dboxMulPow64(uint64(mant*2+1)<<β, φ)
s, r := zi/p10κ1, uint32(zi%p10κ1)
δi := dboxDelta64(φ, β)
if r < δi {
if r != 0 || !exact || mant%2 == 0 {
s, zeros := trimZeros(s)
dboxDigits(d, s, -k0+1+zeros)
return
}
s--
r = p10κ * 10
} else if r == δi {
parity, exact := dboxParity64(uint64(mant*2-1), φ, β)
if parity || (exact && mant%2 == 0) {
s, zeros := trimZeros(s)
dboxDigits(d, s, -k0+1+zeros)
return
}
}
// Algorithm 5.4 (page 18).
D := r + p10κ/2 - δi/2
t, ρ := D/p10κ, D%p10κ
yru := 10*s + uint64(t)
if ρ == 0 {
parity, exact := dboxParity64(mant*2, φ, β)
if parity != ((D-p10κ/2)%2 != 0) || exact && yru%2 != 0 {
yru--
}
}
dboxDigits(d, yru, -k0)
}
// Almost identical to dragonboxFtoa64.
// This is kept as a separate copy to minimize runtime overhead.
func dboxFtoa32(d *decimalSlice, mant uint32, exp int, denorm bool) {
if mant == 1<<float32MantBits && !denorm {
// Algorithm 5.6 (page 24).
k0 := -mulLog10_2MinusLog10_4Over3(exp)
φ, β := dboxPow32(k0, exp)
xi, zi := dboxRange32(φ, β)
if exp != 2 && exp != 3 {
xi++
}
q := zi / 10
if xi <= q*10 {
q, zeros := trimZeros(uint64(q))
dboxDigits(d, q, -k0+1+zeros)
return
}
yru := dboxRoundUp32(φ, β)
if exp == -77 && yru%2 != 0 {
yru--
} else if yru < xi {
yru++
}
dboxDigits(d, uint64(yru), -k0)
return
}
// κ = 1 for float32 (section 5.1.3)
const (
κ = 1
p10κ = 10
p10κ1 = p10κ * 10
)
// Algorithm 5.2 (page 15).
k0 := -mulLog10_2(exp)
φ, β := dboxPow32(κ+k0, exp)
zi, exact := dboxMulPow32(uint32(mant*2+1)<<β, φ)
s, r := zi/p10κ1, uint32(zi%p10κ1)
δi := dboxDelta32(φ, β)
if r < δi {
if r != 0 || !exact || mant%2 == 0 {
s, zeros := trimZeros(uint64(s))
dboxDigits(d, s, -k0+1+zeros)
return
}
s--
r = p10κ * 10
} else if r == δi {
parity, exact := dboxParity32(uint32(mant*2-1), φ, β)
if parity || (exact && mant%2 == 0) {
s, zeros := trimZeros(uint64(s))
dboxDigits(d, s, -k0+1+zeros)
return
}
}
// Algorithm 5.4 (page 18).
D := r + p10κ/2 - δi/2
t, ρ := D/p10κ, D%p10κ
yru := 10*s + uint32(t)
if ρ == 0 {
parity, exact := dboxParity32(mant*2, φ, β)
if parity != ((D-p10κ/2)%2 != 0) || exact && yru%2 != 0 {
yru--
}
}
dboxDigits(d, uint64(yru), -k0)
}
// dboxDigits emits decimal digits of mant in d for float64
// and adjusts the decimal point based on exp.
func dboxDigits(d *decimalSlice, mant uint64, exp int) {
i := formatBase10(d.d, mant)
d.d = d.d[i:]
d.nd = len(d.d)
d.dp = d.nd + exp
}
// uadd128 returns the full 128 bits of u + n.
func uadd128(u uint128, n uint64) uint128 {
sum := uint64(u.Lo + n)
// Check if lo is wrapped around.
if sum < u.Lo {
u.Hi++
}
u.Lo = sum
return u
}
// umul64 returns the full 64 bits of x * y.
func umul64(x, y uint32) uint64 {
return uint64(x) * uint64(y)
}
// umul96Upper64 returns the upper 64 bits (out of 96 bits) of x * y.
func umul96Upper64(x uint32, y uint64) uint64 {
yh := uint32(y >> 32)
yl := uint32(y)
xyh := umul64(x, yh)
xyl := umul64(x, yl)
return xyh + (xyl >> 32)
}
// umul96Lower64 returns the lower 64 bits (out of 96 bits) of x * y.
func umul96Lower64(x uint32, y uint64) uint64 {
return uint64(uint64(x) * y)
}
// umul128Upper64 returns the upper 64 bits (out of 128 bits) of x * y.
func umul128Upper64(x, y uint64) uint64 {
a := uint32(x >> 32)
b := uint32(x)
c := uint32(y >> 32)
d := uint32(y)
ac := umul64(a, c)
bc := umul64(b, c)
ad := umul64(a, d)
bd := umul64(b, d)
intermediate := (bd >> 32) + uint64(uint32(ad)) + uint64(uint32(bc))
return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32)
}
// umul192Upper128 returns the upper 128 bits (out of 192 bits) of x * y.
func umul192Upper128(x uint64, y uint128) uint128 {
r := umul128(x, y.Hi)
t := umul128Upper64(x, y.Lo)
return uadd128(r, t)
}
// umul192Lower128 returns the lower 128 bits (out of 192 bits) of x * y.
func umul192Lower128(x uint64, y uint128) uint128 {
high := x * y.Hi
highLow := umul128(x, y.Lo)
return uint128{uint64(high + highLow.Hi), highLow.Lo}
}
// dboxMulPow64 computes x^(i), y^(i), z^(i)
// from the precomputed value of φ̃k for float64
// and also checks if x^(f), y^(f), z^(f) == 0 (section 5.2.1).
func dboxMulPow64(u uint64, phi uint128) (intPart uint64, isInt bool) {
r := umul192Upper128(u, phi)
intPart = r.Hi
isInt = r.Lo == 0
return
}
// dboxMulPow32 computes x^(i), y^(i), z^(i)
// from the precomputed value of φ̃k for float32
// and also checks if x^(f), y^(f), z^(f) == 0 (section 5.2.1).
func dboxMulPow32(u uint32, phi uint64) (intPart uint32, isInt bool) {
r := umul96Upper64(u, phi)
intPart = uint32(r >> 32)
isInt = uint32(r) == 0
return
}
// dboxParity64 computes only the parity of x^(i), y^(i), z^(i)
// from the precomputed value of φ̃k for float64
// and also checks if x^(f), y^(f), z^(f) = 0 (section 5.2.1).
func dboxParity64(mant2 uint64, phi uint128, beta int) (parity bool, isInt bool) {
r := umul192Lower128(mant2, phi)
parity = ((r.Hi >> (64 - beta)) & 1) != 0
isInt = ((uint64(r.Hi << beta)) | (r.Lo >> (64 - beta))) == 0
return
}
// dboxParity32 computes only the parity of x^(i), y^(i), z^(i)
// from the precomputed value of φ̃k for float32
// and also checks if x^(f), y^(f), z^(f) = 0 (section 5.2.1).
func dboxParity32(mant2 uint32, phi uint64, beta int) (parity bool, isInt bool) {
r := umul96Lower64(mant2, phi)
parity = ((r >> (64 - beta)) & 1) != 0
isInt = uint32(r>>(32-beta)) == 0
return
}
// dboxDelta64 returns δ^(i) from the precomputed value of φ̃k for float64.
func dboxDelta64(φ uint128, β int) uint32 {
return uint32(φ.Hi >> (64 - 1 - β))
}
// dboxDelta32 returns δ^(i) from the precomputed value of φ̃k for float32.
func dboxDelta32(φ uint64, β int) uint32 {
return uint32(φ >> (64 - 1 - β))
}
// mulLog10_2MinusLog10_4Over3 computes
// ⌊e*log10(2)-log10(4/3)⌋ = ⌊log10(2^e)-log10(4/3)⌋ (section 6.3).
func mulLog10_2MinusLog10_4Over3(e int) int {
// e should be in the range [-2985, 2936].
return (e*631305 - 261663) >> 21
}
const (
floatMantBits64 = 52 // p = 52 for float64.
floatMantBits32 = 23 // p = 23 for float32.
)
// dboxRange64 returns the left and right float64 endpoints.
func dboxRange64(φ uint128, β int) (left, right uint64) {
left = (φ.Hi - (φ.Hi >> (float64MantBits + 2))) >> (64 - float64MantBits - 1 - β)
right = (φ.Hi + (φ.Hi >> (float64MantBits + 1))) >> (64 - float64MantBits - 1 - β)
return left, right
}
// dboxRange32 returns the left and right float32 endpoints.
func dboxRange32(φ uint64, β int) (left, right uint32) {
left = uint32((φ - (φ >> (floatMantBits32 + 2))) >> (64 - floatMantBits32 - 1 - β))
right = uint32((φ + (φ >> (floatMantBits32 + 1))) >> (64 - floatMantBits32 - 1 - β))
return left, right
}
// dboxRoundUp64 computes the round up of y (i.e., y^(ru)).
func dboxRoundUp64(phi uint128, beta int) uint64 {
return (phi.Hi>>(128/2-floatMantBits64-2-beta) + 1) / 2
}
// dboxRoundUp32 computes the round up of y (i.e., y^(ru)).
func dboxRoundUp32(phi uint64, beta int) uint32 {
return uint32(phi>>(64-floatMantBits32-2-beta)+1) / 2
}
// dboxPow64 gets the precomputed value of φ̃̃k for float64.
func dboxPow64(k, e int) (φ uint128, β int) {
φ, e1, _ := pow10(k)
if k < 0 || k > 55 {
φ.Lo++
}
β = e + e1 - 1
return φ, β
}
// dboxPow32 gets the precomputed value of φ̃̃k for float32.
func dboxPow32(k, e int) (mant uint64, exp int) {
m, e1, _ := pow10(k)
if k < 0 || k > 27 {
m.Hi++
}
exp = e + e1 - 1
return m.Hi, exp
}

View file

@ -1,184 +0,0 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strconv
import "math/bits"
var uint64pow10 = [...]uint64{
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
}
// fixedFtoa formats a number of decimal digits of mant*(2^exp) into d,
// where mant > 0 and 1 ≤ digits ≤ 18.
// If fmt == 'f', digits is a conservative overestimate, and the final
// number of digits is prec past the decimal point.
func fixedFtoa(d *decimalSlice, mant uint64, exp, digits, prec int, fmt byte) {
// The strategy here is to multiply (mant * 2^exp) by a power of 10
// to make the resulting integer be the number of digits we want.
//
// Adams proved in the Ryu paper that 128-bit precision in the
// power-of-10 constant is sufficient to produce correctly
// rounded output for all float64s, up to 18 digits.
// https://dl.acm.org/doi/10.1145/3192366.3192369
//
// TODO(rsc): The paper is not focused on, nor terribly clear about,
// this fact in this context, and the proof seems too complicated.
// Post a shorter, more direct proof and link to it here.
if digits > 18 {
panic("fixedFtoa called with digits > 18")
}
// Shift mantissa to have 64 bits,
// so that the 192-bit product below will
// have at least 63 bits in its top word.
b := 64 - bits.Len64(mant)
mant <<= b
exp -= b
// We have f = mant * 2^exp ≥ 2^(63+exp)
// and we want to multiply it by some 10^p
// to make it have the number of digits plus one rounding bit:
//
// 2 * 10^(digits-1) ≤ f * 10^p < ~2 * 10^digits
//
// The lower bound is required, but the upper bound is approximate:
// we must not have too few digits, but we can round away extra ones.
//
// f * 10^p ≥ 2 * 10^(digits-1)
// 10^p ≥ 2 * 10^(digits-1) / f [dividing by f]
// p ≥ (log₁₀ 2) + (digits-1) - log₁₀ f [taking log₁₀]
// p ≥ (log₁₀ 2) + (digits-1) - log₁₀ (mant * 2^exp) [expanding f]
// p ≥ (log₁₀ 2) + (digits-1) - (log₁₀ 2) * (64 + exp) [mant < 2⁶⁴]
// p ≥ (digits - 1) - (log₁₀ 2) * (63 + exp) [refactoring]
//
// Once we have p, we can compute the scaled value:
//
// dm * 2^de = mant * 2^exp * 10^p
// = mant * 2^exp * pow/2^128 * 2^exp2.
// = (mant * pow/2^128) * 2^(exp+exp2).
p := (digits - 1) - mulLog10_2(63+exp)
pow, exp2, ok := pow10(p)
if !ok {
// This never happens due to the range of float32/float64 exponent
panic("fixedFtoa: pow10 out of range")
}
if -22 <= p && p < 0 {
// Special case: Let q=-p. q is in [1,22]. We are dividing by 10^q
// and the mantissa may be a multiple of 5^q (5^22 < 2^53),
// in which case the division must be computed exactly and
// recorded as exact for correct rounding. Our normal computation is:
//
// dm = floor(mant * floor(10^p * 2^s))
//
// for some scaling shift s. To make this an exact division,
// it suffices to change the inner floor to a ceil:
//
// dm = floor(mant * ceil(10^p * 2^s))
//
// In the range of values we are using, the floor and ceil
// cancel each other out and the high 64 bits of the product
// come out exactly right.
// (This is the same trick compilers use for division by constants.
// See Hacker's Delight, 2nd ed., Chapter 10.)
pow.Lo++
}
dm, lo1, lo0 := umul192(mant, pow)
de := exp + exp2
// Check whether any bits have been truncated from dm.
// If so, set dt != 0. If not, leave dt == 0 (meaning dm is exact).
var dt uint
switch {
default:
// Most powers of 10 use a truncated constant,
// meaning the result is also truncated.
dt = 1
case 0 <= p && p <= 55:
// Small positive powers of 10 (up to 10⁵⁵) can be represented
// precisely in a 128-bit mantissa (5⁵⁵ ≤ 2¹²⁸), so the only truncation
// comes from discarding the low bits of the 192-bit product.
//
// TODO(rsc): The new proof mentioned above should also
// prove that we can't have lo1 == 0 and lo0 != 0.
// After proving that, drop computation and use of lo0 here.
dt = bool2uint(lo1|lo0 != 0)
case -22 <= p && p < 0 && divisiblePow5(mant, -p):
// If the original mantissa was a multiple of 5^p,
// the result is exact. (See comment above for pow.Lo++.)
dt = 0
}
// The value we want to format is dm * 2^de, where de < 0.
// Multiply by 2^de by shifting, but leave one extra bit for rounding.
// After the shift, the "integer part" of dm is dm>>1,
// the "rounding bit" (the first fractional bit) is dm&1,
// and the "truncated bit" (have any bits been discarded?) is dt.
shift := -de - 1
dt |= bool2uint(dm&(1<<shift-1) != 0)
dm >>= shift
// Set decimal point in eventual formatted digits,
// so we can update it as we adjust the digits.
d.dp = digits - p
// Trim excess digit if any, updating truncation and decimal point.
// The << 1 is leaving room for the rounding bit.
max := uint64pow10[digits] << 1
if dm >= max {
var r uint
dm, r = dm/10, uint(dm%10)
dt |= bool2uint(r != 0)
d.dp++
}
// If this is %.*f we may have overestimated the digits needed.
// Now that we know where the decimal point is,
// trim to the actual number of digits, which is d.dp+prec.
if fmt == 'f' && digits != d.dp+prec {
for digits > d.dp+prec {
var r uint
dm, r = dm/10, uint(dm%10)
dt |= bool2uint(r != 0)
digits--
}
// Dropping those digits can create a new leftmost
// non-zero digit, like if we are formatting %.1f and
// convert 0.09 -> 0.1. Detect and adjust for that.
if digits <= 0 {
digits = 1
d.dp++
}
max = uint64pow10[digits] << 1
}
// Round and shift away rounding bit.
// We want to round up when
// (a) the fractional part is > 0.5 (dm&1 != 0 and dt == 1)
// (b) or the fractional part is ≥ 0.5 and the integer part is odd
// (dm&1 != 0 and dm&2 != 0).
// The bitwise expression encodes that logic.
dm += uint64(uint(dm) & (dt | uint(dm)>>1) & 1)
dm >>= 1
if dm == max>>1 {
// 999... rolled over to 1000...
dm = uint64pow10[digits-1]
d.dp++
}
// Format digits into d.
if dm != 0 {
if formatBase10(d.d[:digits], dm) != 0 {
panic("formatBase10")
}
d.nd = digits
for d.d[d.nd-1] == '0' {
d.nd--
}
}
}

View file

@ -6,21 +6,8 @@ package strconv_test
import . "internal/strconv"
type uint128 = Uint128
const (
pow10Min = Pow10Min
pow10Max = Pow10Max
)
var (
mulLog10_2 = MulLog10_2
mulLog2_10 = MulLog2_10
log2Pow10 = Log2Pow10
log10Pow2 = Log10Pow2
parseFloatPrefix = ParseFloatPrefix
pow10 = Pow10
umul128 = Umul128
umul192 = Umul192
div5Tab = Div5Tab
divisiblePow5 = DivisiblePow5
trimZeros = TrimZeros
)

View file

@ -15,8 +15,9 @@ func FormatUint(i uint64, base int) string {
return small(int(i))
}
var a [24]byte
j := formatBase10(a[:], i)
return string(a[j:])
nd := numDigits(i)
formatBase10(a[:nd], i)
return string(a[:nd])
}
_, s := formatBits(nil, i, base, false, false)
return s
@ -35,12 +36,13 @@ func FormatInt(i int64, base int) string {
if i < 0 {
u = -u
}
j := formatBase10(a[:], u)
nd := numDigits(u)
formatBase10(a[1:1+nd], u)
if i < 0 {
j--
a[j] = '-'
a[0] = '-'
return string(a[:1+nd])
}
return string(a[j:])
return string(a[1 : 1+nd])
}
_, s := formatBits(nil, uint64(i), base, i < 0, false)
return s
@ -70,8 +72,9 @@ func AppendUint(dst []byte, i uint64, base int) []byte {
return append(dst, small(int(i))...)
}
var a [24]byte
j := formatBase10(a[:], i)
return append(dst, a[j:]...)
nd := numDigits(i)
formatBase10(a[:nd], i)
return append(dst, a[:nd]...)
}
dst, _ = formatBits(dst, i, base, false, true)
return dst
@ -164,8 +167,6 @@ const smalls = "00010203040506070809" +
"80818283848586878889" +
"90919293949596979899"
const host64bit = ^uint(0)>>32 != 0
// small returns the string for an i with 0 <= i < nSmalls.
func small(i int) string {
if i < 10 {
@ -179,59 +180,52 @@ func small(i int) string {
// It is only for use by package runtime.
// Other packages should use AppendUint.
func RuntimeFormatBase10(a []byte, u uint64) int {
return formatBase10(a, u)
}
// formatBase10 formats the decimal representation of u into the tail of a
// and returns the offset of the first byte written to a. That is, after
//
// i := formatBase10(a, u)
//
// the decimal representation is in a[i:].
func formatBase10(a []byte, u uint64) int {
// Split into 9-digit chunks that fit in uint32s
// and convert each chunk using uint32 math instead of uint64 math.
// The obvious way to write the outer loop is "for u >= 1e9", but most numbers are small,
// so the setup for the comparison u >= 1e9 is usually pure overhead.
// Instead, we approximate it by u>>29 != 0, which is usually faster and good enough.
i := len(a)
for (host64bit && u>>29 != 0) || (!host64bit && uint32(u)>>29|uint32(u>>32) != 0) {
var lo uint32
u, lo = u/1e9, uint32(u%1e9)
// Convert 9 digits.
for range 4 {
var dd uint32
lo, dd = lo/100, (lo%100)*2
i -= 2
a[i+0], a[i+1] = smalls[dd+0], smalls[dd+1]
}
i--
a[i] = smalls[lo*2+1]
// If we'd been using u >= 1e9 then we would be guaranteed that u/1e9 > 0,
// but since we used u>>29 != 0, u/1e9 might be 0, so we might be done.
// (If u is now 0, then at the start we had 2²⁹ ≤ u < 10⁹, so it was still correct
// to write 9 digits; we have not accidentally written any leading zeros.)
if u == 0 {
return i
}
}
// Convert final chunk, at most 8 digits.
lo := uint32(u)
for lo >= 100 {
var dd uint32
lo, dd = lo/100, (lo%100)*2
i -= 2
a[i+0], a[i+1] = smalls[dd+0], smalls[dd+1]
}
i--
dd := lo * 2
a[i] = smalls[dd+1]
if lo >= 10 {
i--
a[i] = smalls[dd+0]
}
// Note: numDigits requires an argument ≥ 1.
// The |1 changes 0 to 1 without adding an extra digit
// to any other value.
i := len(a) - numDigits(u|1)
formatBase10(a[i:], u)
return i
}
// formatBase10 formats the decimal representation of u into a.
// The caller is responsible for ensuring that a is big enough to hold u.
// If a is too big, leading zeros will be filled in as needed.
func formatBase10(a []byte, u uint64) {
nd := len(a)
for nd >= 8 {
// Format last 8 digits (4 pairs).
x3210 := uint32(u % 1e8)
u /= 1e8
x32, x10 := x3210/1e4, x3210%1e4
x1, x0 := (x10/100)*2, (x10%100)*2
x3, x2 := (x32/100)*2, (x32%100)*2
a[nd-1], a[nd-2] = smalls[x0+1], smalls[x0]
a[nd-3], a[nd-4] = smalls[x1+1], smalls[x1]
a[nd-5], a[nd-6] = smalls[x2+1], smalls[x2]
a[nd-7], a[nd-8] = smalls[x3+1], smalls[x3]
nd -= 8
}
x := uint32(u)
if nd >= 4 {
// Format last 4 digits (2 pairs).
x10 := x % 1e4
x /= 1e4
x1, x0 := (x10/100)*2, (x10%100)*2
a[nd-1], a[nd-2] = smalls[x0+1], smalls[x0]
a[nd-3], a[nd-4] = smalls[x1+1], smalls[x1]
nd -= 4
}
if nd >= 2 {
// Format last 2 digits.
x0 := (x % 1e2) * 2
x /= 1e2
a[nd-1], a[nd-2] = smalls[x0+1], smalls[x0]
nd -= 2
}
if nd > 0 {
// Format final digit.
a[0] = byte('0' + x)
}
}

View file

@ -86,6 +86,15 @@ func TestItoa(t *testing.T) {
}
}
if test.base == 10 && test.in >= 0 {
buf := make([]byte, 32)
i := RuntimeFormatBase10(buf[:], uint64(test.in))
s := string(buf[i:])
if s != test.out {
t.Errorf("RuntimeFormatBase10(%d) = %q, want %q", test.in, s, test.out)
}
}
if test.base == 10 && int64(int(test.in)) == test.in {
s := Itoa(int(test.in))
if s != test.out {
@ -131,7 +140,14 @@ func TestUitoa(t *testing.T) {
t.Errorf("AppendUint(%q, %v, %v) = %q want %v",
"abc", test.in, test.base, x, test.out)
}
if test.base == 10 {
buf := make([]byte, 32)
i := RuntimeFormatBase10(buf[:], test.in)
s := string(buf[i:])
if s != test.out {
t.Errorf("RuntimeFormatBase10(%d) = %q, want %q", test.in, s, test.out)
}
}
}
}

View file

@ -1,179 +0,0 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strconv
import "math/bits"
// A uint128 is a 128-bit uint.
// The fields are exported to make them visible to package strconv_test.
type uint128 struct {
Hi uint64
Lo uint64
}
// umul128 returns the 128-bit product x*y.
func umul128(x, y uint64) uint128 {
hi, lo := bits.Mul64(x, y)
return uint128{hi, lo}
}
// umul192 returns the 192-bit product x*y in three uint64s.
func umul192(x uint64, y uint128) (hi, mid, lo uint64) {
mid1, lo := bits.Mul64(x, y.Lo)
hi, mid2 := bits.Mul64(x, y.Hi)
mid, carry := bits.Add64(mid1, mid2, 0)
return hi + carry, mid, lo
}
// pow10 returns the 128-bit mantissa and binary exponent of 10**e.
// That is, 10^e = mant/2^128 * 2**exp.
// If e is out of range, pow10 returns ok=false.
func pow10(e int) (mant uint128, exp int, ok bool) {
if e < pow10Min || e > pow10Max {
return
}
return pow10Tab[e-pow10Min], 1 + mulLog2_10(e), true
}
// mulLog10_2 returns math.Floor(x * log(2)/log(10)) for an integer x in
// the range -1600 <= x && x <= +1600.
//
// The range restriction lets us work in faster integer arithmetic instead of
// slower floating point arithmetic. Correctness is verified by unit tests.
func mulLog10_2(x int) int {
// log(2)/log(10) ≈ 0.30102999566 ≈ 78913 / 2^18
return (x * 78913) >> 18
}
// mulLog2_10 returns math.Floor(x * log(10)/log(2)) for an integer x in
// the range -500 <= x && x <= +500.
//
// The range restriction lets us work in faster integer arithmetic instead of
// slower floating point arithmetic. Correctness is verified by unit tests.
func mulLog2_10(x int) int {
// log(10)/log(2) ≈ 3.32192809489 ≈ 108853 / 2^15
return (x * 108853) >> 15
}
func bool2uint(b bool) uint {
if b {
return 1
}
return 0
}
// Exact Division and Remainder Checking
//
// An exact division x/c (exact means x%c == 0)
// can be implemented by x*m where m is the multiplicative inverse of c (m*c == 1).
//
// Since c is also the multiplicative inverse of m, x*m is lossless,
// and all the exact multiples of c map to all of [0, maxUint64/c].
// The non-multiples are forced to map to larger values.
// This also gives a quick test for whether x is an exact multiple of c:
// compute the exact division and check whether it's at most maxUint64/c:
// x%c == 0 => x*m <= maxUint64/c.
//
// Only odd c have multiplicative inverses mod powers of two.
// To do an exact divide x / (c<<s) we can use (x/c)>>s instead.
// And to check for remainder, we need to check that those low s
// bits are all zero before we shift them away. We can merge that
// with the <= for the exact odd remainder check by rotating the
// shifted bits into the high part instead:
// x%(c<<s) == 0 => bits.RotateLeft64(x*m, -s) <= maxUint64/c.
//
// The compiler does this transformation automatically in general,
// but we apply it here by hand in a few ways that the compiler can't help with.
//
// For a more detailed explanation, see
// Henry S. Warren, Jr., Hacker's Delight, 2nd ed., sections 10-16 and 10-17.
// divisiblePow5 reports whether x is divisible by 5^p.
// It returns false for p not in [1, 22],
// because we only care about float64 mantissas, and 5^23 > 2^53.
func divisiblePow5(x uint64, p int) bool {
return 1 <= p && p <= 22 && x*div5Tab[p-1][0] <= div5Tab[p-1][1]
}
const maxUint64 = 1<<64 - 1
// div5Tab[p-1] is the multiplicative inverse of 5^p and maxUint64/5^p.
var div5Tab = [22][2]uint64{
{0xcccccccccccccccd, maxUint64 / 5},
{0x8f5c28f5c28f5c29, maxUint64 / 5 / 5},
{0x1cac083126e978d5, maxUint64 / 5 / 5 / 5},
{0xd288ce703afb7e91, maxUint64 / 5 / 5 / 5 / 5},
{0x5d4e8fb00bcbe61d, maxUint64 / 5 / 5 / 5 / 5 / 5},
{0x790fb65668c26139, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5},
{0xe5032477ae8d46a5, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0xc767074b22e90e21, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x8e47ce423a2e9c6d, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x4fa7f60d3ed61f49, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x0fee64690c913975, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x3662e0e1cf503eb1, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0xa47a2cf9f6433fbd, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x54186f653140a659, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x7738164770402145, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0xe4a4d1417cd9a041, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0xc75429d9e5c5200d, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0xc1773b91fac10669, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x26b172506559ce15, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0xd489e3a9addec2d1, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x90e860bb892c8d5d, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
{0x502e79bf1b6f4f79, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
}
// trimZeros trims trailing zeros from x.
// It finds the largest p such that x % 10^p == 0
// and then returns x / 10^p, p.
//
// This is here for reference and tested, because it is an optimization
// used by other ftoa algorithms, but in our implementations it has
// never been benchmarked to be faster than trimming zeros after
// formatting into decimal bytes.
func trimZeros(x uint64) (uint64, int) {
const (
div1e8m = 0xc767074b22e90e21
div1e8le = maxUint64 / 100000000
div1e4m = 0xd288ce703afb7e91
div1e4le = maxUint64 / 10000
div1e2m = 0x8f5c28f5c28f5c29
div1e2le = maxUint64 / 100
div1e1m = 0xcccccccccccccccd
div1e1le = maxUint64 / 10
)
// _ = assert[x - y] asserts at compile time that x == y.
// Assert that the multiplicative inverses are correct
// by checking that (div1eNm * 5^N) % 1<<64 == 1.
var assert [1]struct{}
_ = assert[(div1e8m*5*5*5*5*5*5*5*5)%(1<<64)-1]
_ = assert[(div1e4m*5*5*5*5)%(1<<64)-1]
_ = assert[(div1e2m*5*5)%(1<<64)-1]
_ = assert[(div1e1m*5)%(1<<64)-1]
// Cut 8 zeros, then 4, then 2, then 1.
p := 0
for d := bits.RotateLeft64(x*div1e8m, -8); d <= div1e8le; d = bits.RotateLeft64(x*div1e8m, -8) {
x = d
p += 8
}
if d := bits.RotateLeft64(x*div1e4m, -4); d <= div1e4le {
x = d
p += 4
}
if d := bits.RotateLeft64(x*div1e2m, -2); d <= div1e2le {
x = d
p += 2
}
if d := bits.RotateLeft64(x*div1e1m, -1); d <= div1e1le {
x = d
p += 1
}
return x, p
}

View file

@ -5,161 +5,26 @@
package strconv_test
import (
. "internal/strconv"
"math"
"testing"
)
var pow10Tests = []struct {
exp10 int
mant uint128
exp2 int
ok bool
}{
{-349, uint128{0, 0}, 0, false},
{-348, uint128{0xFA8FD5A0081C0288, 0x1732C869CD60E453}, -1156, true},
{0, uint128{0x8000000000000000, 0x0000000000000000}, 1, true},
{347, uint128{0xD13EB46469447567, 0x4B7195F2D2D1A9FB}, 1153, true},
{348, uint128{0, 0}, 0, false},
}
func TestPow10(t *testing.T) {
for _, tt := range pow10Tests {
mant, exp2, ok := pow10(tt.exp10)
if mant != tt.mant || exp2 != tt.exp2 {
t.Errorf("pow10(%d) = %#016x, %#016x, %d, %v want %#016x,%#016x, %d, %v",
tt.exp10, mant.Hi, mant.Lo, exp2, ok,
tt.mant.Hi, tt.mant.Lo, tt.exp2, tt.ok)
}
}
for p := pow10Min; p <= pow10Max; p++ {
mant, exp2, ok := pow10(p)
if !ok {
t.Errorf("pow10(%d) not ok", p)
continue
}
// Note: -64 instead of -128 because we only used mant.Hi, not all of mant.
have := math.Ldexp(float64(mant.Hi), exp2-64)
want := math.Pow(10, float64(p))
if math.Abs(have-want)/want > 0.00001 {
t.Errorf("pow10(%d) = %#016x%016x/2^128 * 2^%d = %g want ~%g", p, mant.Hi, mant.Lo, exp2, have, want)
}
}
}
func u128(hi, lo uint64) uint128 {
return uint128{Hi: hi, Lo: lo}
}
var umul192Tests = []struct {
x uint64
y uint128
hi uint64
mid uint64
lo uint64
}{
{0, u128(0, 0), 0, 0, 0},
{^uint64(0), u128(^uint64(0), ^uint64(0)), ^uint64(1), ^uint64(0), 1},
}
func TestUmul192(t *testing.T) {
for _, tt := range umul192Tests {
hi, mid, lo := Umul192(tt.x, tt.y)
if hi != tt.hi || mid != tt.mid || lo != tt.lo {
t.Errorf("umul192(%#x, {%#x,%#x}) = %#x, %#x, %#x, want %#x, %#x, %#x",
tt.x, tt.y.Hi, tt.y.Lo, hi, mid, lo, tt.hi, tt.mid, tt.lo)
}
}
}
func TestMulLog10_2(t *testing.T) {
func TestLog10Pow2(t *testing.T) {
for x := -1600; x <= +1600; x++ {
iMath := mulLog10_2(x)
iMath := log10Pow2(x)
fMath := int(math.Floor(float64(x) * math.Ln2 / math.Ln10))
if iMath != fMath {
t.Errorf("mulLog10_2(%d) failed: %d vs %d\n", x, iMath, fMath)
t.Errorf("log10Pow2(%d) = %d, want %d\n", x, iMath, fMath)
}
}
}
func TestMulLog2_10(t *testing.T) {
func TestLog2Pow10(t *testing.T) {
for x := -500; x <= +500; x++ {
iMath := mulLog2_10(x)
iMath := log2Pow10(x)
fMath := int(math.Floor(float64(x) * math.Ln10 / math.Ln2))
if iMath != fMath {
t.Errorf("mulLog2_10(%d) failed: %d vs %d\n", x, iMath, fMath)
}
}
}
func pow5(p int) uint64 {
x := uint64(1)
for range p {
x *= 5
}
return x
}
func TestDivisiblePow5(t *testing.T) {
for p := 1; p <= 22; p++ {
x := pow5(p)
if divisiblePow5(1, p) {
t.Errorf("divisiblePow5(1, %d) = true, want, false", p)
}
if divisiblePow5(x-1, p) {
t.Errorf("divisiblePow5(%d, %d) = true, want false", x-1, p)
}
if divisiblePow5(x+1, p) {
t.Errorf("divisiblePow5(%d, %d) = true, want false", x-1, p)
}
if divisiblePow5(x/5, p) {
t.Errorf("divisiblePow5(%d, %d) = true, want false", x/5, p)
}
if !divisiblePow5(0, p) {
t.Errorf("divisiblePow5(0, %d) = false, want true", p)
}
if !divisiblePow5(x, p) {
t.Errorf("divisiblePow5(%d, %d) = false, want true", x, p)
}
if 2*x > x && !divisiblePow5(2*x, p) {
t.Errorf("divisiblePow5(%d, %d) = false, want true", 2*x, p)
}
}
}
func TestDiv5Tab(t *testing.T) {
for p := 1; p <= 22; p++ {
m := div5Tab[p-1][0]
le := div5Tab[p-1][1]
// See comment in math.go on div5Tab.
// m needs to be multiplicative inverse of pow5(p).
if m*pow5(p) != 1 {
t.Errorf("pow5Tab[%d-1][0] = %#x, but %#x * (5**%d) = %d, want 1", p, m, m, p, m*pow5(p))
}
// le needs to be ⌊(1<<64 - 1) / 5^p⌋.
want := (1<<64 - 1) / pow5(p)
if le != want {
t.Errorf("pow5Tab[%d-1][1] = %#x, want %#x", p, le, want)
}
}
}
func TestTrimZeros(t *testing.T) {
for _, x := range []uint64{1, 2, 3, 4, 101, 123} {
want := x
for p := range 20 {
haveX, haveP := trimZeros(x)
if haveX != want || haveP != p {
t.Errorf("trimZeros(%d) = %d, %d, want %d, %d", x, haveX, haveP, want, p)
}
if x >= (1<<64-1)/10 {
break
}
x *= 10
t.Errorf("log2Pow10(%d) = %d, want %d\n", x, iMath, fMath)
}
}
}

View file

@ -55,7 +55,19 @@ func main() {
}
d := new(big.Int).Div(r.Num(), r.Denom())
hi, lo := new(big.Int).DivMod(d, b1p64, new(big.Int))
fmt.Fprintf(&out, "\t{%#016x, %#016x}, // 1e%d * 2**%d\n", hi.Uint64(), lo.Uint64(), e, be)
uhi := hi.Uint64()
ulo := lo.Uint64()
if !r.IsInt() {
ulo++
if ulo == 0 {
uhi++
}
}
if ulo != 0 {
uhi++
ulo = -ulo
}
fmt.Fprintf(&out, "\t{%#016x, %#016x}, // 1e%d * 2**%d\n", uhi, ulo, e, be)
}
fmt.Fprintf(&out, "}\n")
@ -86,6 +98,6 @@ const (
// pow10Tab holds 128-bit mantissas of powers of 10.
// The values are scaled so the high bit is always set; there is no "implicit leading 1 bit".
var pow10Tab = [...]uint128{
// The values are scaled so the high bit is always set.
var pow10Tab = [...]pmHiLo{
`

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,290 @@
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Floating point binary↔decimal conversion by fast unrounded scaling.
// See “Floating-Point Printing and Parsing Can Be Simple And Fast”,
// https://research.swtch.com/fp
package strconv
import (
"math/bits"
"unsafe"
)
// bool2 converts b to an integer: 1 for true, 0 for false.
func bool2[T ~int | ~uint32 | ~uint64](b bool) T {
if b {
return 1
}
return 0
}
// pack64 takes m, e and returns f = m * 2**e.
// It assumes the caller has provided a 53-bit mantissa m
// and an exponent that is in range for the mantissa.
func pack64(m uint64, e int) (float64, error) {
if m&(1<<52) == 0 {
return float64frombits(m), nil
}
if e >= 0x7FF-1075 {
return float64frombits(m&(1<<63) | 0x7ff<<52), ErrRange
}
return float64frombits(m&^(1<<52) | uint64(1075+e)<<52), nil
}
// pack32 takes m, e and returns f = m * 2**e.
// It assumes the caller has provided a 24-bit mantissa m
// and an exponent that is in range for the mantissa.
func pack32(m uint32, e int) (float32, error) {
if m&(1<<23) == 0 {
return float32frombits(m), nil
}
if e >= 0xFF-150 {
return float32frombits(m&(1<<31) | 0xff<<23), ErrRange
}
return float32frombits(m&^(1<<23) | uint32(150+e)<<23), nil
}
// An unrounded represents an unrounded value.
type unrounded uint64
func (u unrounded) floor() uint64 { return uint64((u + 0) >> 2) }
func (u unrounded) roundHalfDown() uint64 { return uint64((u + 1) >> 2) }
func (u unrounded) round() uint64 { return uint64((u + 1 + (u>>2)&1) >> 2) }
func (u unrounded) roundHalfUp() uint64 { return uint64((u + 2) >> 2) }
func (u unrounded) ceil() uint64 { return uint64((u + 3) >> 2) }
func (u unrounded) nudge(δ int) unrounded { return u + unrounded(δ) }
func (u unrounded) div(d uint64) unrounded {
x := uint64(u)
return unrounded(x/d) | u&1 | bool2[unrounded](x%d != 0)
}
func (u unrounded) rsh(s int) unrounded {
return u>>s | u&1 | bool2[unrounded](u&((1<<s)-1) != 0)
}
// log10Pow2(x) returns ⌊log₁₀ 2**x⌋ = ⌊x * log₁₀ 2⌋.
func log10Pow2(x int) int {
// log₁₀ 2 ≈ 0.30102999566 ≈ 78913 / 2^18
return (x * 78913) >> 18
}
// log2Pow10(x) returns ⌊log₂ 10**x⌋ = ⌊x * log₂ 10⌋.
func log2Pow10(x int) int {
// log₂ 10 ≈ 3.32192809489 ≈ 108853 / 2^15
return (x * 108853) >> 15
}
// uint64pow10[x] is 10**x.
var uint64pow10 = [...]uint64{
1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
}
// fixedWidthFloat returns the n-digit decimal form of f = m * 2**e as d * 10**p.
// n can be at most 18.
// If fmt == 'f' then n is a conservative estimate of the number of digits,
// and digits are discarded to match prec.
func fixedWidthFloat(m uint64, e, n, prec int, fmt byte) (d uint64, p int) {
p = n - 1 - log10Pow2(e+63)
var pre scaler
prescale(&pre, e, p, log2Pow10(p))
u := uscale(m, &pre)
if u >= unmin(uint64pow10[n]) {
u = u.div(10)
p--
}
if fmt == 'f' {
for p > prec {
u = u.div(10)
p--
}
}
return u.round(), -p
}
// parseFloat64 rounds d * 10**p to the nearest float64 f.
// d can have at most 19 digits.
// It returns ErrRange if the result rounds to infinity.
func parseFloat64(d uint64, p int, sign uint64) (float64, error) {
b := bits.Len64(d)
lp := log2Pow10(p)
e := min(1074, 53-b-lp)
var pre scaler
prescale(&pre, e-(64-b), p, lp)
if pre.s >= 64 {
return float64frombits(sign | 0), nil
}
u := uscale(d<<(64-b), &pre)
// This block is branch-free code for:
// if u.round() >= 1<<53 {
// u = u.rsh(1)
// e = e - 1
// }
s := bool2[int](u >= unmin(1<<53))
u = u>>s | u&1
e = e - s
return pack64(sign|u.round(), -e)
}
// parseFloat32 rounds d * 10**p to the nearest float32 f.
// d can have at most 19 digits.
// It returns ErrRange if the result rounds to infinity.
func parseFloat32(d uint64, p int, sign uint32) (float32, error) {
b := bits.Len64(d)
lp := log2Pow10(p)
e := min(149, 24-b-lp)
var pre scaler
prescale(&pre, e-(64-b), p, lp)
if pre.s >= 64 {
return float32frombits(sign | 0), nil
}
u := uscale(d<<(64-b), &pre)
// This block is branch-free code for:
// if u.round() >= 1<<24 {
// u = u.rsh(1)
// e = e - 1
// }
s := bool2[int](u >= unmin(1<<24))
u = u>>s | u&1
e = e - s
return pack32(sign|uint32(u.round()), -e)
}
// unmin returns the minimum unrounded that rounds to x.
func unmin(x uint64) unrounded {
return unrounded(x<<2 - 2)
}
// shortFloat computes the shortest formatting of f,
// using as few digits as possible that will still round trip
// back to the original float.
func shortFloat[F float32 | float64](m uint64, e int) (d uint64, p int) {
var mantBits, minExp int // parameterized constants
switch 8 * unsafe.Sizeof(F(0)) {
case 32:
mantBits = float32MantBits
minExp = float32MinExp
case 64:
mantBits = float64MantBits
minExp = float64MinExp
}
// Note: these cases could be factored a little more,
// but in the first two branches, z is a constant,
// allowing the compiler to greatly simplify the code.
var min, max uint64
var odd int
z := 63 - mantBits
if m == 1<<63 && e > minExp {
p = -skewed(e + z)
min = m - 1<<(z-2) // min = m - 1/4 * 2**(e+z)
max = m + 1<<(z-1) // max = m + 1/2 * 2**(e+z)
odd = int(m>>z) & 1
} else if e >= minExp {
p = -log10Pow2(e + z)
min = m - 1<<(z-1) // min = m - 1/2 * 2**(e+z)
max = m + 1<<(z-1) // max = m + 1/2 * 2**(e+z)
odd = int(m>>z) & 1
} else {
z = z + (minExp - e)
p = -log10Pow2(e + z)
min = m - 1<<(z-1) // min = m - 1/2 * 2**(e+z)
max = m + 1<<(z-1) // max = m + 1/2 * 2**(e+z)
odd = int(m>>z) & 1
}
var pre scaler
prescale(&pre, e, p, log2Pow10(p))
dmin := uscale(min, &pre).nudge(+odd).ceil()
dmax := uscale(max, &pre).nudge(-odd).floor()
if d = dmax / 10; d*10 >= dmin {
return d, -(p - 1)
}
if d = dmin; d < dmax {
d = uscale(m, &pre).round()
}
return d, -p
}
// skewed computes the skewed footprint of m * 2**e,
// which is ⌊log₁₀ 3/4 * 2**e⌋ = ⌊e*(log₁₀ 2)-(log₁₀ 4/3)⌋.
func skewed(e int) int {
return (e*631305 - 261663) >> 21
}
// A pmHiLo represents hi<<64 - lo.
type pmHiLo struct {
hi uint64
lo uint64
}
// A scaler holds derived scaling constants for a given e, p pair.
type scaler struct {
// Note: using pm pmHiLo here nudges uscale just over the inlining boundary. Don't.
pmHi uint64
pmLo uint64
s int
}
// prescale returns the scaling constants for e, p.
// lp must be log2Pow10(p).
// The caller is responsible for either avoiding e, p pairs
// that cause pre.s < 0 or pre.s >= 64, or else handling
// those cases before passing the result to uscale.
// In practice, pre.s < 0 would indicate a buggy caller
// and pre.s >= 64 can only happen for parsing and is
// picked off at those call sites.
func prescale(pre *scaler, e, p, lp int) {
pre.pmHi = pow10Tab[p-pow10Min].hi
pre.pmLo = pow10Tab[p-pow10Min].lo
pre.s = -(e + lp + 3)
}
// uscale returns unround(x * 2**e * 10**p).
// The caller should pass &pre for prescale(&pre, e, p, log2Pow10(p))
// and should have left-justified x so its high bit is set.
// The caller is also responsible for checking that c.s < 64.
// For formatting, that's always true.
// For parsing, the caller needs to pick it off early and return a signed 0.
func uscale(x uint64, c *scaler) unrounded {
hi, mid := bits.Mul64(x, c.pmHi)
s := c.s & 63 // make shifts cheaper
if hi>>s<<s != hi {
return unrounded(hi>>s | 1)
}
mid2, _ := bits.Mul64(x, c.pmLo)
hi -= bool2[uint64](mid < mid2)
return unrounded(hi>>s | bool2[uint64](mid-mid2 > 1))
}
// setDigits sets digs to the nd digits described by d, p.
func setDigits(s []byte, d uint64, p, nd int) (dp, nzd int) {
// Note: nd <= len(s) is guaranteed by caller,
// but writing it explicitly here lets the compiler know,
// so that it can remove the bounds check in the loop.
// (The slice s[:nd] not panicking only establishes nd <= cap(s).)
if nd <= len(s) {
formatBase10(s[:nd], d)
dp = nd + p
for nd > 0 && s[nd-1] == '0' {
nd--
}
}
return dp, nd
}
// numDigits returns the number of decimal digits in d.
// It requires d ≥ 1.
func numDigits(d uint64) int {
nd := log10Pow2(bits.Len64(d))
return nd + bool2[int](d >= uint64pow10[nd])
}