From 113eb42efca8e14355f57c89cd38d31616728a27 Mon Sep 17 00:00:00 2001 From: Taichi Maeda Date: Thu, 20 Nov 2025 23:56:29 +0000 Subject: [PATCH] strconv: replace Ryu ftoa with Dragonbox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dragonbox is a faster ftoa algorithm that provides the same guarantees as Ryu: round-trip conversion, shortest length, and correct rounding. Dragonbox only supports shortest-precision conversion, so we continue to use Ryu-printf for fixed precision. The new implementation has been fuzz-tested against the current Ryu implementation in addition to the existing test suite. Benchmarks show at least ~15-20% performance improvement. The following shows the relevant output from benchstat. Full benchmark results and plots are available at: https://github.com/taichimaeda/dragonbox-bench/ goos: darwin goarch: arm64 pkg: strconv cpu: Apple M1 │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ FormatFloat/Decimal-8 32.71n ± 14% 31.89n ± 12% ~ (p=0.165 n=10) FormatFloat/Float-8 45.54n ± 1% 42.48n ± 0% -6.70% (p=0.000 n=10) FormatFloat/Exp-8 50.06n ± 0% 32.27n ± 1% -35.54% (p=0.000 n=10) FormatFloat/NegExp-8 47.15n ± 1% 31.33n ± 0% -33.56% (p=0.000 n=10) FormatFloat/LongExp-8 46.15n ± 1% 43.66n ± 0% -5.38% (p=0.000 n=10) FormatFloat/Big-8 50.02n ± 0% 39.36n ± 0% -21.31% (p=0.000 n=10) FormatFloat/BinaryExp-8 27.89n ± 0% 27.88n ± 1% ~ (p=0.798 n=10) FormatFloat/32Integer-8 31.41n ± 0% 23.00n ± 3% -26.79% (p=0.000 n=10) FormatFloat/32ExactFraction-8 44.93n ± 1% 29.91n ± 0% -33.43% (p=0.000 n=10) FormatFloat/32Point-8 43.22n ± 1% 33.82n ± 0% -21.74% (p=0.000 n=10) FormatFloat/32Exp-8 45.91n ± 0% 25.48n ± 0% -44.50% (p=0.000 n=10) FormatFloat/32NegExp-8 44.66n ± 0% 25.12n ± 0% -43.76% (p=0.000 n=10) FormatFloat/32Shortest-8 37.96n ± 0% 27.83n ± 1% -26.68% (p=0.000 n=10) FormatFloat/Slowpath64-8 47.74n ± 2% 45.85n ± 0% -3.96% (p=0.000 n=10) FormatFloat/SlowpathDenormal64-8 42.78n ± 1% 41.46n ± 0% -3.07% (p=0.000 n=10) FormatFloat/ShorterIntervalCase32-8 25.49n ± 2% FormatFloat/ShorterIntervalCase64-8 27.72n ± 1% geomean 41.95n 31.89n -22.11% Fixes #74886 Co-authored-by: Junekey Jeon Change-Id: I923f7259c9cecd0896b2340a43d1041cc2ed7787 GitHub-Last-Rev: fd735db0b1e3fab5fbad4d8b75c8e29247069d94 GitHub-Pull-Request: golang/go#75195 Reviewed-on: https://go-review.googlesource.com/c/go/+/700075 Reviewed-by: Russ Cox Reviewed-by: Alan Donovan TryBot-Bypass: Russ Cox --- src/internal/strconv/ftoa.go | 6 +- src/internal/strconv/ftoa_test.go | 4 + src/internal/strconv/ftoadbox.go | 349 ++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 src/internal/strconv/ftoadbox.go diff --git a/src/internal/strconv/ftoa.go b/src/internal/strconv/ftoa.go index 64be29e23ef..c8c98c13804 100644 --- a/src/internal/strconv/ftoa.go +++ b/src/internal/strconv/ftoa.go @@ -86,6 +86,7 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte { neg := bits>>(flt.expbits+flt.mantbits) != 0 exp := int(bits>>flt.mantbits) & (1<> 32) + yl := uint32(y) + + xyh := umul64(x, yh) + xyl := umul64(x, yl) + + return xyh + (xyl >> 32) +} + +// umul96Lower64 returns the lower 64 bits (out of 96 bits) of x * y. +func umul96Lower64(x uint32, y uint64) uint64 { + return uint64(uint64(x) * y) +} + +// umul128Upper64 returns the upper 64 bits (out of 128 bits) of x * y. +func umul128Upper64(x, y uint64) uint64 { + a := uint32(x >> 32) + b := uint32(x) + c := uint32(y >> 32) + d := uint32(y) + + ac := umul64(a, c) + bc := umul64(b, c) + ad := umul64(a, d) + bd := umul64(b, d) + + intermediate := (bd >> 32) + uint64(uint32(ad)) + uint64(uint32(bc)) + + return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32) +} + +// umul192Upper128 returns the upper 128 bits (out of 192 bits) of x * y. +func umul192Upper128(x uint64, y uint128) uint128 { + r := umul128(x, y.Hi) + t := umul128Upper64(x, y.Lo) + return uadd128(r, t) +} + +// umul192Lower128 returns the lower 128 bits (out of 192 bits) of x * y. +func umul192Lower128(x uint64, y uint128) uint128 { + high := x * y.Hi + highLow := umul128(x, y.Lo) + return uint128{uint64(high + highLow.Hi), highLow.Lo} +} + +// dboxMulPow64 computes x^(i), y^(i), z^(i) +// from the precomputed value of φ̃k for float64 +// and also checks if x^(f), y^(f), z^(f) == 0 (section 5.2.1). +func dboxMulPow64(u uint64, phi uint128) (intPart uint64, isInt bool) { + r := umul192Upper128(u, phi) + intPart = r.Hi + isInt = r.Lo == 0 + return +} + +// dboxMulPow32 computes x^(i), y^(i), z^(i) +// from the precomputed value of φ̃k for float32 +// and also checks if x^(f), y^(f), z^(f) == 0 (section 5.2.1). +func dboxMulPow32(u uint32, phi uint64) (intPart uint32, isInt bool) { + r := umul96Upper64(u, phi) + intPart = uint32(r >> 32) + isInt = uint32(r) == 0 + return +} + +// dboxParity64 computes only the parity of x^(i), y^(i), z^(i) +// from the precomputed value of φ̃k for float64 +// and also checks if x^(f), y^(f), z^(f) = 0 (section 5.2.1). +func dboxParity64(mant2 uint64, phi uint128, beta int) (parity bool, isInt bool) { + r := umul192Lower128(mant2, phi) + parity = ((r.Hi >> (64 - beta)) & 1) != 0 + isInt = ((uint64(r.Hi << beta)) | (r.Lo >> (64 - beta))) == 0 + return +} + +// dboxParity32 computes only the parity of x^(i), y^(i), z^(i) +// from the precomputed value of φ̃k for float32 +// and also checks if x^(f), y^(f), z^(f) = 0 (section 5.2.1). +func dboxParity32(mant2 uint32, phi uint64, beta int) (parity bool, isInt bool) { + r := umul96Lower64(mant2, phi) + parity = ((r >> (64 - beta)) & 1) != 0 + isInt = uint32(r>>(32-beta)) == 0 + return +} + +// dboxDelta64 returns δ^(i) from the precomputed value of φ̃k for float64. +func dboxDelta64(φ uint128, β int) uint32 { + return uint32(φ.Hi >> (64 - 1 - β)) +} + +// dboxDelta32 returns δ^(i) from the precomputed value of φ̃k for float32. +func dboxDelta32(φ uint64, β int) uint32 { + return uint32(φ >> (64 - 1 - β)) +} + +// mulLog10_2MinusLog10_4Over3 computes +// ⌊e*log10(2)-log10(4/3)⌋ = ⌊log10(2^e)-log10(4/3)⌋ (section 6.3). +func mulLog10_2MinusLog10_4Over3(e int) int { + // e should be in the range [-2985, 2936]. + return (e*631305 - 261663) >> 21 +} + +const ( + floatMantBits64 = 52 // p = 52 for float64. + floatMantBits32 = 23 // p = 23 for float32. +) + +// dboxRange64 returns the left and right float64 endpoints. +func dboxRange64(φ uint128, β int) (left, right uint64) { + left = (φ.Hi - (φ.Hi >> (float64MantBits + 2))) >> (64 - float64MantBits - 1 - β) + right = (φ.Hi + (φ.Hi >> (float64MantBits + 1))) >> (64 - float64MantBits - 1 - β) + return left, right +} + +// dboxRange32 returns the left and right float32 endpoints. +func dboxRange32(φ uint64, β int) (left, right uint32) { + left = uint32((φ - (φ >> (floatMantBits32 + 2))) >> (64 - floatMantBits32 - 1 - β)) + right = uint32((φ + (φ >> (floatMantBits32 + 1))) >> (64 - floatMantBits32 - 1 - β)) + return left, right +} + +// dboxRoundUp64 computes the round up of y (i.e., y^(ru)). +func dboxRoundUp64(phi uint128, beta int) uint64 { + return (phi.Hi>>(128/2-floatMantBits64-2-beta) + 1) / 2 +} + +// dboxRoundUp32 computes the round up of y (i.e., y^(ru)). +func dboxRoundUp32(phi uint64, beta int) uint32 { + return uint32(phi>>(64-floatMantBits32-2-beta)+1) / 2 +} + +// dboxPow64 gets the precomputed value of φ̃̃k for float64. +func dboxPow64(k, e int) (φ uint128, β int) { + φ, e1, _ := pow10(k) + if k < 0 || k > 55 { + φ.Lo++ + } + β = e + e1 - 1 + return φ, β +} + +// dboxPow32 gets the precomputed value of φ̃̃k for float32. +func dboxPow32(k, e int) (mant uint64, exp int) { + m, e1, _ := pow10(k) + if k < 0 || k > 27 { + m.Hi++ + } + exp = e + e1 - 1 + return m.Hi, exp +}