internal/strconv: use fast unrounded scaling for floating-point

Change floating-point parsing and printing to use fast unrounded scaling, as presented in “Floating-Point Printing and Parsing Can Be Simple And Fast”, https://research.swtch.com/fp This CL deletes almost 900 lines of code while making printing much faster. Parsing is simpler but about the same speed. benchmark \ host local linux-arm64 s7 linux-amd64 s7:GOARCH=386 linux-386 vs base vs base vs base vs base vs base vs base Atof64Decimal ~ -0.47% +1.67% -2.70% +1.21% +2.70% Atof64Float ~ +2.48% ~ ~ ~ ~ Atof64FloatExp ~ +1.15% ~ ~ +1.83% +3.87% Atof64Big ~ +4.74% ~ ~ +4.17% +7.30% Atof64RandomBits -27.95% ~ -10.11% -10.54% ~ ~ Atof64RandomFloats ~ +0.75% ~ ~ ~ ~ Atof64RandomLongFloats -6.24% -4.17% -9.60% -8.55% -2.62% -4.52% Atof32Decimal ~ +3.24% +3.76% +3.89% ~ ~ Atof32Float ~ +2.77% ~ ~ ~ ~ Atof32FloatExp ~ +4.02% ~ ~ +2.56% +6.55% Atof32Random -11.97% -10.38% -14.53% -12.41% -5.65% -3.64% Atof32RandomLong -22.77% -22.45% -20.00% -26.52% -13.36% -13.84% AppendFloat/Decimal +10.82% +2.66% -3.00% ~ ~ ~ AppendFloat/Float -14.26% -2.49% -21.84% -10.52% -4.70% -4.84% AppendFloat/Exp +7.69% +6.01% -4.15% ~ -1.04% ~ AppendFloat/NegExp +8.95% +5.75% -4.73% ~ ~ ~ AppendFloat/LongExp -33.48% -29.01% -31.61% -34.75% -23.30% -21.21% AppendFloat/Big -16.91% -27.85% -32.91% -30.01% -18.67% -8.51% AppendFloat/BinaryExp -19.88% -8.47% -9.40% -18.08% -9.40% -9.33% AppendFloat/32Integer +29.68% -11.91% -12.31% -15.43% +5.49% +14.83% AppendFloat/32ExactFraction -14.62% -7.34% -14.28% -13.78% +6.46% +15.68% AppendFloat/32Point ~ -16.51% -30.84% -22.16% ~ +11.04% AppendFloat/32Exp -7.44% -7.72% -8.47% -12.78% +6.13% +18.65% AppendFloat/32NegExp -6.36% -7.58% -10.05% -13.23% +7.69% +18.18% AppendFloat/32Shortest -17.45% -18.23% -18.03% -19.29% +2.68% +11.41% AppendFloat/32Fixed8Hard -13.57% -14.52% -14.91% -16.55% -20.28% -23.51% AppendFloat/32Fixed9Hard -16.06% -16.69% -11.75% -19.42% -5.12% ~ AppendFloat/64Fixed1 -3.25% -9.70% -8.67% -14.11% -13.08% -14.23% AppendFloat/64Fixed2 -1.77% -9.77% -9.40% -12.25% -12.69% -13.32% AppendFloat/64Fixed2.5 -3.46% -6.21% -12.38% -10.86% -10.47% -11.31% AppendFloat/64Fixed3 ~ -9.39% -11.13% -14.39% -14.50% -11.16% AppendFloat/64Fixed4 ~ -11.91% -20.62% -13.40% -19.78% -22.41% AppendFloat/64Fixed5Hard -6.45% -7.36% -13.88% -12.42% -12.31% -12.92% AppendFloat/64Fixed12 -26.39% -23.15% -29.45% -28.11% -24.63% -27.61% AppendFloat/64Fixed16 ~ -15.85% -21.24% -19.76% -24.24% -26.14% AppendFloat/64Fixed12Hard -16.25% -12.77% -18.74% -19.20% -17.08% -18.95% AppendFloat/64Fixed17Hard -16.81% -9.80% -12.77% -17.19% -2.75% +6.06% AppendFloat/64Fixed18Hard ~ -0.76% ~ ~ ~ -26.49% AppendFloat/64FixedF1 +16.15% -12.93% -18.60% -18.24% +1.57% ~ AppendFloat/64FixedF2 -16.83% -9.77% -12.09% -18.43% -13.44% -15.23% AppendFloat/64FixedF3 ~ -5.68% -9.65% -15.14% -8.87% -11.83% AppendFloat/Slowpath64 -33.56% -28.32% -32.45% -33.42% -22.77% -18.76% AppendFloat/SlowpathDenormal64 -31.53% -25.45% -32.60% -25.27% -13.36% -6.95% AppendFloat/ShorterIntervalCase32 -19.52% -14.41% -13.89% -17.03% ~ +12.21% AppendFloat/ShorterIntervalCase64 +14.00% +14.94% +4.06% ~ +9.43% +8.07% AppendUint -33.66% -13.19% -11.52% -13.39% -13.68% -9.04% AppendUintVarlen/digits=1 ~ -4.96% ~ ~ ~ +13.97% AppendUintVarlen/digits=2 +10.01% +2.45% ~ ~ ~ +11.23% AppendUintVarlen/digits=3 -5.10% +0.53% +2.32% +3.74% +18.05% +61.14% AppendUintVarlen/digits=4 ~ +14.32% ~ +6.86% +22.09% +61.28% AppendUintVarlen/digits=5 -18.17% +0.62% +1.13% ~ +13.94% +48.42% AppendUintVarlen/digits=6 -8.74% +7.58% +2.47% +7.86% +17.45% +50.58% AppendUintVarlen/digits=7 -27.30% -2.17% -1.61% ~ +8.31% +37.41% AppendUintVarlen/digits=8 -19.80% +13.49% ~ ~ +19.81% +55.28% AppendUintVarlen/digits=9 -28.86% +3.29% ~ -7.81% +10.42% +39.20% AppendUintVarlen/digits=10 -33.46% -8.00% -12.57% -19.07% -8.59% +7.48% AppendUintVarlen/digits=11 -37.91% -8.32% -11.85% -16.89% -10.14% ~ AppendUintVarlen/digits=12 -28.93% -7.26% -14.27% -23.26% -12.07% ~ AppendUintVarlen/digits=13 -33.20% -8.87% -13.43% -21.56% -8.39% ~ AppendUintVarlen/digits=14 -33.20% -9.49% -13.50% -21.92% -10.43% ~ AppendUintVarlen/digits=15 -36.90% -9.16% -8.48% -16.95% -10.62% ~ AppendUintVarlen/digits=16 -36.20% -8.06% -13.58% -20.92% -6.67% +3.52% AppendUintVarlen/digits=17 -36.15% -7.47% -14.12% -21.53% -6.00% +3.88% AppendUintVarlen/digits=18 -35.85% -7.56% -14.12% -19.66% -9.16% ~ AppendUintVarlen/digits=19 -43.45% -17.14% -20.38% -28.29% -25.25% -16.47% AppendUintVarlen/digits=20 -40.70% -13.60% -18.66% -24.18% -24.69% -17.33% Change-Id: I4eed57cfbf398b5d5327efd749e13610e17153e9 Reviewed-on: https://go-review.googlesource.com/c/go/+/743860 LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Neal Patel <neal@golang.org> Reviewed-by: Neal Patel <nealpatel@google.com>
2026-06-27 03:11:23 +00:00 · 2026-01-30 12:09:42 -05:00 · 2026-01-30 12:09:42 -05:00 · 71300e8011
commit 71300e8011
parent fd7a0e680d
18 changed files with 1365 additions and 2216 deletions
--- a/src/cmd/compile/internal/test/inl_test.go
+++ b/src/cmd/compile/internal/test/inl_test.go
@ -243,6 +243,10 @@ func TestIntendedInlining(t *testing.T) {
 		"path/filepath": {
 			"scanChunk",
 		},
+		"internal/strconv": {
+			"prescale",
+			"uscale",
+		},
 	}

 	if runtime.GOARCH != "386" && runtime.GOARCH != "loong64" && runtime.GOARCH != "mips64" && runtime.GOARCH != "mips64le" && runtime.GOARCH != "riscv64" {
--- a/src/internal/strconv/atof.go
+++ b/src/internal/strconv/atof.go
@ -4,6 +4,17 @@

 package strconv

+type floatInfo struct {
+	mantbits uint
+	expbits  uint
+	bias     int
+}
+
+var (
+	float32info = floatInfo{float32MantBits, float32ExpBits, float32Bias}
+	float64info = floatInfo{float64MantBits, float64ExpBits, float64Bias}
+)
+
 // decimal to binary floating point conversion.
 // Algorithm:
 //   1) Store input in multiprecision decimal.
@ -565,45 +576,54 @@ func atof32(s string) (f float32, n int, err error) {
 		return float32(val), n, nil
 	}

-	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
+	d, p, neg, trunc, hex, n, ok := readFloat(s)
 	if !ok {
 		return 0, n, ErrSyntax
 	}

 	if hex {
-		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
+		f, err := atofHex(s[:n], &float32info, d, p, neg, trunc)
 		return float32(f), n, err
 	}

 	if optimize {
-		// Try pure floating-point arithmetic conversion, and if that fails,
-		// the Eisel-Lemire algorithm.
+		sign := bool2[uint32](neg) << 31
+		if d == 0 {
+			return float32frombits(sign | 0), n, nil
+		}
+		if p > 40 { // overflow to ±Inf
+			return float32frombits(sign | 0xff<<23), n, ErrRange
+		}
+		if p < -70 { // underflow to ±0
+			return float32frombits(sign | 0), n, nil
+		}
 		if !trunc {
-			if f, ok := atof32exact(mantissa, exp, neg); ok {
+			// Exact rounding with single multiplication or division.
+			if f, ok := atof32exact(d, p, neg); ok {
 				return f, n, nil
 			}
 		}
-		f, ok := eiselLemire32(mantissa, exp, neg)
-		if ok {
-			if !trunc {
-				return f, n, nil
-			}
-			// Even if the mantissa was truncated, we may
-			// have found the correct result. Confirm by
-			// converting the upper mantissa bound.
-			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
-			if ok && f == fUp {
-				return f, n, nil
-			}
+		// Use fast unrounded scaling.
+		// The only possible err is ErrRange, when the result overflows to ±Inf.
+		f, err := parseFloat32(d, p, sign)
+		if !trunc {
+			return f, n, err
+		}
+		// If additional digits were truncated from d
+		// but d+1 converts to the same value,
+		// then the additional digits don't matter.
+		f1, _ := parseFloat32(d+1, p, sign)
+		if f == f1 {
+			return f, n, err
 		}
 	}

 	// Slow fallback.
-	var d decimal
-	if !d.set(s[:n]) {
+	var dec decimal
+	if !dec.set(s[:n]) {
 		return 0, n, ErrSyntax
 	}
-	b, ovf := d.floatBits(&float32info)
+	b, ovf := dec.floatBits(&float32info)
 	f = float32frombits(uint32(b))
 	if ovf {
 		err = ErrRange
@ -616,45 +636,52 @@ func atof64(s string) (f float64, n int, err error) {
 		return val, n, nil
 	}

-	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
+	d, p, neg, trunc, hex, n, ok := readFloat(s)
 	if !ok {
 		return 0, n, ErrSyntax
 	}
-
 	if hex {
-		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
+		f, err := atofHex(s[:n], &float64info, d, p, neg, trunc)
 		return f, n, err
 	}
-
 	if optimize {
-		// Try pure floating-point arithmetic conversion, and if that fails,
-		// the Eisel-Lemire algorithm.
+		sign := bool2[uint64](neg) << 63
+		if d == 0 {
+			return float64frombits(sign | 0), n, nil
+		}
+		if p > 310 { // overflow to ±Inf
+			return float64frombits(sign | 0x7ff<<52), n, ErrRange
+		}
+		if p < -345 { // underflow to ±0
+			return float64frombits(sign | 0), n, nil
+		}
 		if !trunc {
-			if f, ok := atof64exact(mantissa, exp, neg); ok {
+			// Exact rounding with single multiplication or division.
+			if f, ok := atof64exact(d, p, neg); ok {
 				return f, n, nil
 			}
 		}
-		f, ok := eiselLemire64(mantissa, exp, neg)
-		if ok {
-			if !trunc {
-				return f, n, nil
-			}
-			// Even if the mantissa was truncated, we may
-			// have found the correct result. Confirm by
-			// converting the upper mantissa bound.
-			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
-			if ok && f == fUp {
-				return f, n, nil
-			}
+		// Use fast unrounded scaling.
+		// The only possible err is ErrRange, when the result overflows to ±Inf.
+		f, err := parseFloat64(d, p, sign)
+		if !trunc {
+			return f, n, err
+		}
+		// If additional digits were truncated from d
+		// but d+1 converts to the same value,
+		// then the additional digits don't matter.
+		f1, _ := parseFloat64(d+1, p, sign)
+		if f == f1 {
+			return f, n, err
 		}
 	}

 	// Slow fallback.
-	var d decimal
-	if !d.set(s[:n]) {
+	var dec decimal
+	if !dec.set(s[:n]) {
 		return 0, n, ErrSyntax
 	}
-	b, ovf := d.floatBits(&float64info)
+	b, ovf := dec.floatBits(&float64info)
 	f = float64frombits(b)
 	if ovf {
 		err = ErrRange
--- a/src/internal/strconv/atof_test.go
+++ b/src/internal/strconv/atof_test.go
@ -196,6 +196,9 @@ var atoftests = []atofTest{
 	// way too small
 	{"1e-350", "0", nil},
 	{"1e-400000", "0", nil},
+	{"1e-345", "0", nil}, // picked off in atof64
+	{"1e-343", "0", nil}, // large c.s in parseFloat64
+	{"9.999999999999999999e-343", "0", nil},

 	// Near denormals and denormals.
 	{"0x2.00000000000000p-1010", "1.8227805048890994e-304", nil}, // 0x00e0000000000000
@ -420,6 +423,11 @@ var atof32tests = []atofTest{
 	{"0x0.0000008p-125", "0", nil},             // rounded down
 	{"0x0.0000007p-125", "0", nil},             // rounded down

+	{"1e-70", "0", nil}, // picked off in atof32
+	{"1e-65", "0", nil}, // picked off in atof32
+	{"1e-64", "0", nil}, // large c.s in parseFloat32
+	{"9.999999999999999999e-64", "0", nil},
+
 	// 2^92 = 8388608p+69 = 4951760157141521099596496896 (4.9517602e27)
 	// is an exact power of two that needs 8 decimal digits to be correctly
 	// parsed back.
--- a/src/internal/strconv/atofeisel.go
+++ b/src/internal/strconv/atofeisel.go
@ -1,166 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strconv
-
-// This file implements the Eisel-Lemire ParseFloat algorithm, published in
-// 2020 and discussed extensively at
-// https://nigeltao.github.io/blog/2020/eisel-lemire.html
-//
-// The original C++ implementation is at
-// https://github.com/lemire/fast_double_parser/blob/644bef4306059d3be01a04e77d3cc84b379c596f/include/fast_double_parser.h#L840
-//
-// This Go re-implementation closely follows the C re-implementation at
-// https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/internal/cgen/base/floatconv-submodule-code.c#L990
-//
-// Additional testing (on over several million test strings) is done by
-// https://github.com/nigeltao/parse-number-fxx-test-data/blob/5280dcfccf6d0b02a65ae282dad0b6d9de50e039/script/test-go-strconv.go
-
-import (
-	"math/bits"
-)
-
-func eiselLemire64(man uint64, exp10 int, neg bool) (f float64, ok bool) {
-	// The terse comments in this function body refer to sections of the
-	// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
-
-	// Exp10 Range.
-	if man == 0 {
-		if neg {
-			f = float64frombits(0x8000000000000000) // Negative zero.
-		}
-		return f, true
-	}
-	pow, exp2, ok := pow10(exp10)
-	if !ok {
-		return 0, false
-	}
-
-	// Normalization.
-	clz := bits.LeadingZeros64(man)
-	man <<= uint(clz)
-	retExp2 := uint64(exp2+63-float64Bias) - uint64(clz)
-
-	// Multiplication.
-	xHi, xLo := bits.Mul64(man, pow.Hi)
-
-	// Wider Approximation.
-	if xHi&0x1FF == 0x1FF && xLo+man < man {
-		yHi, yLo := bits.Mul64(man, pow.Lo)
-		mergedHi, mergedLo := xHi, xLo+yHi
-		if mergedLo < xLo {
-			mergedHi++
-		}
-		if mergedHi&0x1FF == 0x1FF && mergedLo+1 == 0 && yLo+man < man {
-			return 0, false
-		}
-		xHi, xLo = mergedHi, mergedLo
-	}
-
-	// Shifting to 54 Bits.
-	msb := xHi >> 63
-	retMantissa := xHi >> (msb + 9)
-	retExp2 -= 1 ^ msb
-
-	// Half-way Ambiguity.
-	if xLo == 0 && xHi&0x1FF == 0 && retMantissa&3 == 1 {
-		return 0, false
-	}
-
-	// From 54 to 53 Bits.
-	retMantissa += retMantissa & 1
-	retMantissa >>= 1
-	if retMantissa>>53 > 0 {
-		retMantissa >>= 1
-		retExp2 += 1
-	}
-	// retExp2 is a uint64. Zero or underflow means that we're in subnormal
-	// float64 space. 0x7FF or above means that we're in Inf/NaN float64 space.
-	//
-	// The if block is equivalent to (but has fewer branches than):
-	//   if retExp2 <= 0 || retExp2 >= 0x7FF { etc }
-	if retExp2-1 >= 0x7FF-1 {
-		return 0, false
-	}
-	retBits := retExp2<<float64MantBits | retMantissa&(1<<float64MantBits-1)
-	if neg {
-		retBits |= 0x8000000000000000
-	}
-	return float64frombits(retBits), true
-}
-
-func eiselLemire32(man uint64, exp10 int, neg bool) (f float32, ok bool) {
-	// The terse comments in this function body refer to sections of the
-	// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
-	//
-	// That blog post discusses the float64 flavor (11 exponent bits with a
-	// -1023 bias, 52 mantissa bits) of the algorithm, but the same approach
-	// applies to the float32 flavor (8 exponent bits with a -127 bias, 23
-	// mantissa bits). The computation here happens with 64-bit values (e.g.
-	// man, xHi, retMantissa) before finally converting to a 32-bit float.
-
-	// Exp10 Range.
-	if man == 0 {
-		if neg {
-			f = float32frombits(0x80000000) // Negative zero.
-		}
-		return f, true
-	}
-	pow, exp2, ok := pow10(exp10)
-	if !ok {
-		return 0, false
-	}
-
-	// Normalization.
-	clz := bits.LeadingZeros64(man)
-	man <<= uint(clz)
-	retExp2 := uint64(exp2+63-float32Bias) - uint64(clz)
-
-	// Multiplication.
-	xHi, xLo := bits.Mul64(man, pow.Hi)
-
-	// Wider Approximation.
-	if xHi&0x3FFFFFFFFF == 0x3FFFFFFFFF && xLo+man < man {
-		yHi, yLo := bits.Mul64(man, pow.Lo)
-		mergedHi, mergedLo := xHi, xLo+yHi
-		if mergedLo < xLo {
-			mergedHi++
-		}
-		if mergedHi&0x3FFFFFFFFF == 0x3FFFFFFFFF && mergedLo+1 == 0 && yLo+man < man {
-			return 0, false
-		}
-		xHi, xLo = mergedHi, mergedLo
-	}
-
-	// Shifting to 54 Bits (and for float32, it's shifting to 25 bits).
-	msb := xHi >> 63
-	retMantissa := xHi >> (msb + 38)
-	retExp2 -= 1 ^ msb
-
-	// Half-way Ambiguity.
-	if xLo == 0 && xHi&0x3FFFFFFFFF == 0 && retMantissa&3 == 1 {
-		return 0, false
-	}
-
-	// From 54 to 53 Bits (and for float32, it's from 25 to 24 bits).
-	retMantissa += retMantissa & 1
-	retMantissa >>= 1
-	if retMantissa>>24 > 0 {
-		retMantissa >>= 1
-		retExp2 += 1
-	}
-	// retExp2 is a uint64. Zero or underflow means that we're in subnormal
-	// float32 space. 0xFF or above means that we're in Inf/NaN float32 space.
-	//
-	// The if block is equivalent to (but has fewer branches than):
-	//   if retExp2 <= 0 || retExp2 >= 0xFF { etc }
-	if retExp2-1 >= 0xFF-1 {
-		return 0, false
-	}
-	retBits := retExp2<<float32MantBits | retMantissa&(1<<float32MantBits-1)
-	if neg {
-		retBits |= 0x80000000
-	}
-	return float32frombits(uint32(retBits)), true
-}
--- a/src/internal/strconv/atoi.go
+++ b/src/internal/strconv/atoi.go
@ -90,6 +90,7 @@ func ParseUint(s string, base int, bitSize int) (uint64, error) {

 	// Cutoff is the smallest number such that cutoff*base > maxUint64.
 	// Use compile-time constants for common cases.
+	const maxUint64 = 1<<64 - 1
 	var cutoff uint64
 	switch base {
 	case 10:
--- a/src/internal/strconv/export_test.go
+++ b/src/internal/strconv/export_test.go
@ -4,23 +4,10 @@

 package strconv

-type Uint128 = uint128
-
-const (
-	Pow10Min = pow10Min
-	Pow10Max = pow10Max
-)
-
 var (
-	MulLog10_2       = mulLog10_2
-	MulLog2_10       = mulLog2_10
+	Log10Pow2        = log10Pow2
+	Log2Pow10        = log2Pow10
 	ParseFloatPrefix = parseFloatPrefix
-	Pow10            = pow10
-	Umul128          = umul128
-	Umul192          = umul192
-	Div5Tab          = div5Tab
-	DivisiblePow5    = divisiblePow5
-	TrimZeros        = trimZeros
 )

 func NewDecimal(i uint64) *decimal {
--- a/src/internal/strconv/ftoa.go
+++ b/src/internal/strconv/ftoa.go
@ -10,29 +10,26 @@

 package strconv

+import (
+	"math/bits"
+	"unsafe"
+)
+
 const (
 	lowerhex = "0123456789abcdef"
 	upperhex = "0123456789ABCDEF"
 )

-type floatInfo struct {
-	mantbits uint
-	expbits  uint
-	bias     int
-}
-
 const (
 	float32MantBits = 23
 	float32ExpBits  = 8
 	float32Bias     = -127
+	float32MinExp   = -189
+
 	float64MantBits = 52
 	float64ExpBits  = 11
 	float64Bias     = -1023
-)
-
-var (
-	float32info = floatInfo{float32MantBits, float32ExpBits, float32Bias}
-	float64info = floatInfo{float64MantBits, float64ExpBits, float64Bias}
+	float64MinExp   = -1085
 )

 // FormatFloat converts the floating-point number f to a string,
@ -60,179 +57,162 @@ var (
 // The exponent is written as a decimal integer;
 // for all formats other than 'b', it will be at least two digits.
 func FormatFloat(f float64, fmt byte, prec, bitSize int) string {
-	return string(genericFtoa(make([]byte, 0, max(prec+4, 24)), f, fmt, prec, bitSize))
+	if bitSize == 32 {
+		return string(ftoa(make([]byte, 0, max(prec+4, 24)), float32(f), fmt, prec))
+	}
+	if bitSize == 64 {
+		return string(ftoa(make([]byte, 0, max(prec+4, 24)), f, fmt, prec))
+	}
+	panic("strconv: illegal FormatFloat bitSize")
 }

 // AppendFloat appends the string form of the floating-point number f,
 // as generated by [FormatFloat], to dst and returns the extended buffer.
 func AppendFloat(dst []byte, f float64, fmt byte, prec, bitSize int) []byte {
-	return genericFtoa(dst, f, fmt, prec, bitSize)
+	if bitSize == 32 {
+		return ftoa(dst, float32(f), fmt, prec)
+	}
+	if bitSize == 64 {
+		return ftoa(dst, f, fmt, prec)
+	}
+	panic("strconv: illegal AppendFloat bitSize")
 }

-func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
-	var bits uint64
-	var flt *floatInfo
-	switch bitSize {
+func ftoa[F float32 | float64](dst []byte, val F, fmt byte, prec int) []byte {
+	var b uint64
+	var expBits, mantBits, bias int // parameterized constants
+	switch 8 * unsafe.Sizeof(val) {
 	case 32:
-		bits = uint64(float32bits(float32(val)))
-		flt = &float32info
+		b = uint64(float32bits(float32(val)))
+		expBits = float32ExpBits
+		mantBits = float32MantBits
+		bias = float32Bias
 	case 64:
-		bits = float64bits(val)
-		flt = &float64info
-	default:
-		panic("strconv: illegal AppendFloat/FormatFloat bitSize")
+		b = float64bits(float64(val))
+		expBits = float64ExpBits
+		mantBits = float64MantBits
+		bias = float64Bias
 	}

-	neg := bits>>(flt.expbits+flt.mantbits) != 0
-	exp := int(bits>>flt.mantbits) & (1<<flt.expbits - 1)
-	mant := bits & (uint64(1)<<flt.mantbits - 1)
-	denorm := false
-
-	switch exp {
-	case 1<<flt.expbits - 1:
-		// Inf, NaN
-		var s string
-		switch {
-		case mant != 0:
-			s = "NaN"
-		case neg:
-			s = "-Inf"
-		default:
-			s = "+Inf"
+	neg := b>>(expBits+mantBits) != 0
+	exp := int(b>>mantBits) & (1<<expBits - 1)
+	mant := b & (1<<mantBits - 1)
+	if exp == 1<<expBits-1 {
+		if mant != 0 {
+			return append(dst, "NaN"...)
 		}
-		return append(dst, s...)
-
-	case 0:
-		// denormalized
-		exp++
-		denorm = true
-
-	default:
-		// add implicit top bit
-		mant |= uint64(1) << flt.mantbits
+		if neg {
+			return append(dst, "-Inf"...)
+		}
+		return append(dst, "+Inf"...)
 	}
-	exp += flt.bias
+	if exp == 0 {
+		exp++
+	} else {
+		mant |= 1 << mantBits
+	}
+	exp += bias

 	// Pick off easy binary, hex formats.
 	if fmt == 'b' {
-		return fmtB(dst, neg, mant, exp, flt)
+		return fmtB(dst, neg, mant, exp-mantBits)
 	}
 	if fmt == 'x' || fmt == 'X' {
-		return fmtX(dst, prec, fmt, neg, mant, exp, flt)
+		return fmtX(dst, prec, fmt, neg, mant, exp, mantBits)
 	}

-	if !optimize {
-		return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
+	// Pick off zero.
+	if mant == 0 {
+		return fmtEFG(dst, neg, nil, 0, 0, prec, fmt, prec < 0)
 	}

 	// Negative precision means "only as much as needed to be exact."
-	shortest := prec < 0
-	var digs decimalSlice
-	if mant == 0 {
-		return formatDigits(dst, shortest, neg, digs, prec, fmt)
-	}
-	if shortest {
-		// Use the Dragonbox algorithm.
+	if prec < 0 {
+		// Use fast unrounded scaling.
 		var buf [32]byte
-		digs.d = buf[:]
-		dboxFtoa(&digs, mant, exp-int(flt.mantbits), denorm, bitSize)
+		s := 64 - bits.Len64(mant)
+		m := mant << s
+		e := exp - s
+		d, p := shortFloat[F](m, e-mantBits)
+		dp, nd := setDigits(buf[:], d, p, numDigits(d))
 		// Precision for shortest representation mode.
 		switch fmt {
 		case 'e', 'E':
-			prec = max(digs.nd-1, 0)
+			prec = max(nd-1, 0)
 		case 'f':
-			prec = max(digs.nd-digs.dp, 0)
+			prec = max(nd-dp, 0)
 		case 'g', 'G':
-			prec = digs.nd
+			prec = nd
 		}
-		return formatDigits(dst, shortest, neg, digs, prec, fmt)
+		return fmtEFG(dst, neg, buf[:], dp, nd, prec, fmt, true)
 	}

-	// Fixed number of digits.
-	digits := prec
-	switch fmt {
-	case 'f':
-		// %f precision specifies digits after the decimal point.
-		// Estimate an upper bound on the total number of digits needed.
-		// ftoaFixed will shorten as needed according to prec.
-		if exp >= 0 {
-			digits = 1 + mulLog10_2(1+exp) + prec
-		} else {
-			digits = 1 + prec - mulLog10_2(-exp)
-		}
-	case 'e', 'E':
-		digits++
-	case 'g', 'G':
-		if prec == 0 {
-			prec = 1
-		}
-		digits = prec
-	default:
-		// Invalid mode.
-		digits = 1
-	}
-	if digits <= 18 {
-		// digits <= 0 happens for %f on very small numbers
-		// and means that we're guaranteed to print all zeros.
-		if digits > 0 {
-			var buf [24]byte
-			digs.d = buf[:]
-			fixedFtoa(&digs, mant, exp-int(flt.mantbits), digits, prec, fmt)
-		}
-		return formatDigits(dst, false, neg, digs, prec, fmt)
-	}
-
-	return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
-}
-
-// bigFtoa uses multiprecision computations to format a float.
-func bigFtoa(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
-	d := new(decimal)
-	d.Assign(mant)
-	d.Shift(exp - int(flt.mantbits))
-	var digs decimalSlice
-	shortest := prec < 0
-	if shortest {
-		roundShortest(d, mant, exp, flt)
-		digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
-		// Precision for shortest representation mode.
+	if optimize {
+		// Fixed number of digits.
+		digits := prec
 		switch fmt {
-		case 'e', 'E':
-			prec = digs.nd - 1
 		case 'f':
-			prec = max(digs.nd-digs.dp, 0)
-		case 'g', 'G':
-			prec = digs.nd
-		}
-	} else {
-		// Round appropriately.
-		switch fmt {
+			// %f precision specifies digits after the decimal point.
+			// Estimate an upper bound on the total number of digits needed.
+			// ftoaFixed will shorten as needed according to prec.
+			if exp >= 0 {
+				digits = 1 + log10Pow2(1+exp) + prec
+			} else {
+				digits = 1 + prec - log10Pow2(-exp)
+			}
 		case 'e', 'E':
-			d.Round(prec + 1)
-		case 'f':
-			d.Round(d.dp + prec)
+			digits++
 		case 'g', 'G':
 			if prec == 0 {
 				prec = 1
 			}
-			d.Round(prec)
+			digits = prec
+		default:
+			// Invalid mode.
+			digits = 1
+		}
+		if digits <= 18 {
+			// digits <= 0 happens for %f on very small numbers
+			// and means that we're guaranteed to print all zeros.
+			var buf [24]byte
+			var dp, nd int
+			if digits > 0 {
+				s := 64 - bits.Len64(mant)
+				m := mant << s
+				e := exp - s
+				d, p := fixedWidthFloat(m, e-mantBits, digits, prec, fmt)
+				if d != 0 {
+					dp, nd = setDigits(buf[:], d, p, numDigits(d))
+				}
+			}
+			return fmtEFG(dst, neg, buf[:], dp, nd, prec, fmt, false)
 		}
-		digs = decimalSlice{d: d.d[:], nd: d.nd, dp: d.dp}
 	}
-	return formatDigits(dst, shortest, neg, digs, prec, fmt)
-}

-func formatDigits(dst []byte, shortest bool, neg bool, digs decimalSlice, prec int, fmt byte) []byte {
+	// Slow bignum case. Only for non-shortest results.
+	d := new(decimal)
+	d.Assign(mant)
+	d.Shift(exp - mantBits)
 	switch fmt {
 	case 'e', 'E':
-		return fmtE(dst, neg, digs, prec, fmt)
+		d.Round(prec + 1)
 	case 'f':
-		return fmtF(dst, neg, digs, prec)
+		d.Round(d.dp + prec)
 	case 'g', 'G':
+		if prec == 0 {
+			prec = 1
+		}
+		d.Round(prec)
+	}
+	return fmtEFG(dst, neg, d.d[:], d.dp, d.nd, prec, fmt, false)
+}
+
+func fmtEFG(dst []byte, neg bool, s []byte, dp, nd, prec int, fmt byte, shortest bool) []byte {
+	if fmt == 'g' || fmt == 'G' {
 		// trailing fractional zeros in 'e' form will be trimmed.
 		eprec := prec
-		if eprec > digs.nd && digs.nd >= digs.dp {
-			eprec = digs.nd
+		if eprec > nd && nd >= dp {
+			eprec = nd
 		}
 		// %e is used if the exponent from the conversion
 		// is less than -4 or greater than or equal to the precision.
@ -240,286 +220,141 @@ func formatDigits(dst []byte, shortest bool, neg bool, digs decimalSlice, prec i
 		if shortest {
 			eprec = 6
 		}
-		exp := digs.dp - 1
+		exp := dp - 1
 		if exp < -4 || exp >= eprec {
-			if prec > digs.nd {
-				prec = digs.nd
+			if prec > nd {
+				prec = nd
 			}
-			return fmtE(dst, neg, digs, prec-1, fmt+'e'-'g')
+			prec--
+			fmt = fmt + 'e' - 'g'
+		} else {
+			if prec > dp {
+				prec = nd
+			}
+			prec = max(prec-dp, 0)
+			fmt = 'f'
 		}
-		if prec > digs.dp {
-			prec = digs.nd
+	}
+
+	switch fmt {
+	case 'e', 'E': // %e: -d.ddddde±dd
+		// sign
+		if neg {
+			dst = append(dst, '-')
 		}
-		return fmtF(dst, neg, digs, max(prec-digs.dp, 0))
+
+		// first digit
+		ch := byte('0')
+		if nd != 0 {
+			ch = s[0]
+		}
+		dst = append(dst, ch)
+
+		// .moredigits
+		if prec > 0 {
+			dst = append(dst, '.')
+			i := 1
+			m := min(nd, prec+1)
+			if i < m {
+				dst = append(dst, s[i:m]...)
+				i = m
+			}
+			for range prec + 1 - i {
+				dst = append(dst, '0')
+			}
+		}
+
+		// e±
+		dst = append(dst, fmt)
+		exp := dp - 1
+		if nd == 0 { // special case: 0 has exponent 0
+			exp = 0
+		}
+		if exp < 0 {
+			ch = '-'
+			exp = -exp
+		} else {
+			ch = '+'
+		}
+		dst = append(dst, ch)
+
+		// dd or ddd
+		switch {
+		case exp < 10:
+			dst = append(dst, '0', byte(exp)+'0')
+		case exp < 100:
+			dst = append(dst, byte(exp/10)+'0', byte(exp%10)+'0')
+		default:
+			dst = append(dst, byte(exp/100)+'0', byte(exp/10)%10+'0', byte(exp%10)+'0')
+		}
+		return dst
+
+	case 'f': // %f: -ddddddd.ddddd
+		// sign
+		if neg {
+			dst = append(dst, '-')
+		}
+
+		// integer, padded with zeros as needed.
+		if dp > 0 {
+			m := min(nd, dp)
+			for _, c := range s[:m] {
+				dst = append(dst, c)
+			}
+			for range dp - m {
+				dst = append(dst, '0')
+			}
+		} else {
+			dst = append(dst, '0')
+		}
+
+		// fraction
+		if prec > 0 {
+			dst = append(dst, '.')
+			lz := min(prec, max(0, -dp))     // leading zeros
+			m := min(prec-lz, max(0, nd-dp)) // middle digits
+			tz := max(0, prec-lz-m)          // trailing zeros
+			for range lz {
+				dst = append(dst, '0')
+			}
+			off := dp + lz
+			for i := range m {
+				dst = append(dst, s[off+i])
+			}
+			for range tz {
+				dst = append(dst, '0')
+			}
+		}
+		return dst
 	}

 	// unknown format
 	return append(dst, '%', fmt)
 }

-// roundShortest rounds d (= mant * 2^exp) to the shortest number of digits
-// that will let the original floating point value be precisely reconstructed.
-func roundShortest(d *decimal, mant uint64, exp int, flt *floatInfo) {
-	// If mantissa is zero, the number is zero; stop now.
-	if mant == 0 {
-		d.nd = 0
-		return
-	}
-
-	// Compute upper and lower such that any decimal number
-	// between upper and lower (possibly inclusive)
-	// will round to the original floating point number.
-
-	// We may see at once that the number is already shortest.
-	//
-	// Suppose d is not denormal, so that 2^exp <= d < 10^dp.
-	// The closest shorter number is at least 10^(dp-nd) away.
-	// The lower/upper bounds computed below are at distance
-	// at most 2^(exp-mantbits).
-	//
-	// So the number is already shortest if 10^(dp-nd) > 2^(exp-mantbits),
-	// or equivalently log2(10)*(dp-nd) > exp-mantbits.
-	// It is true if 332/100*(dp-nd) >= exp-mantbits (log2(10) > 3.32).
-	minexp := flt.bias + 1 // minimum possible exponent
-	if exp > minexp && 332*(d.dp-d.nd) >= 100*(exp-int(flt.mantbits)) {
-		// The number is already shortest.
-		return
-	}
-
-	// d = mant << (exp - mantbits)
-	// Next highest floating point number is mant+1 << exp-mantbits.
-	// Our upper bound is halfway between, mant*2+1 << exp-mantbits-1.
-	upper := new(decimal)
-	upper.Assign(mant*2 + 1)
-	upper.Shift(exp - int(flt.mantbits) - 1)
-
-	// d = mant << (exp - mantbits)
-	// Next lowest floating point number is mant-1 << exp-mantbits,
-	// unless mant-1 drops the significant bit and exp is not the minimum exp,
-	// in which case the next lowest is mant*2-1 << exp-mantbits-1.
-	// Either way, call it mantlo << explo-mantbits.
-	// Our lower bound is halfway between, mantlo*2+1 << explo-mantbits-1.
-	var mantlo uint64
-	var explo int
-	if mant > 1<<flt.mantbits || exp == minexp {
-		mantlo = mant - 1
-		explo = exp
-	} else {
-		mantlo = mant*2 - 1
-		explo = exp - 1
-	}
-	lower := new(decimal)
-	lower.Assign(mantlo*2 + 1)
-	lower.Shift(explo - int(flt.mantbits) - 1)
-
-	// The upper and lower bounds are possible outputs only if
-	// the original mantissa is even, so that IEEE round-to-even
-	// would round to the original mantissa and not the neighbors.
-	inclusive := mant%2 == 0
-
-	// As we walk the digits we want to know whether rounding up would fall
-	// within the upper bound. This is tracked by upperdelta:
-	//
-	// If upperdelta == 0, the digits of d and upper are the same so far.
-	//
-	// If upperdelta == 1, we saw a difference of 1 between d and upper on a
-	// previous digit and subsequently only 9s for d and 0s for upper.
-	// (Thus rounding up may fall outside the bound, if it is exclusive.)
-	//
-	// If upperdelta == 2, then the difference is greater than 1
-	// and we know that rounding up falls within the bound.
-	var upperdelta uint8
-
-	// Now we can figure out the minimum number of digits required.
-	// Walk along until d has distinguished itself from upper and lower.
-	for ui := 0; ; ui++ {
-		// lower, d, and upper may have the decimal points at different
-		// places. In this case upper is the longest, so we iterate from
-		// ui==0 and start li and mi at (possibly) -1.
-		mi := ui - upper.dp + d.dp
-		if mi >= d.nd {
-			break
-		}
-		li := ui - upper.dp + lower.dp
-		l := byte('0') // lower digit
-		if li >= 0 && li < lower.nd {
-			l = lower.d[li]
-		}
-		m := byte('0') // middle digit
-		if mi >= 0 {
-			m = d.d[mi]
-		}
-		u := byte('0') // upper digit
-		if ui < upper.nd {
-			u = upper.d[ui]
-		}
-
-		// Okay to round down (truncate) if lower has a different digit
-		// or if lower is inclusive and is exactly the result of rounding
-		// down (i.e., and we have reached the final digit of lower).
-		okdown := l != m || inclusive && li+1 == lower.nd
-
-		switch {
-		case upperdelta == 0 && m+1 < u:
-			// Example:
-			// m = 12345xxx
-			// u = 12347xxx
-			upperdelta = 2
-		case upperdelta == 0 && m != u:
-			// Example:
-			// m = 12345xxx
-			// u = 12346xxx
-			upperdelta = 1
-		case upperdelta == 1 && (m != '9' || u != '0'):
-			// Example:
-			// m = 1234598x
-			// u = 1234600x
-			upperdelta = 2
-		}
-		// Okay to round up if upper has a different digit and either upper
-		// is inclusive or upper is bigger than the result of rounding up.
-		okup := upperdelta > 0 && (inclusive || upperdelta > 1 || ui+1 < upper.nd)
-
-		// If it's okay to do either, then round to the nearest one.
-		// If it's okay to do only one, do it.
-		switch {
-		case okdown && okup:
-			d.Round(mi + 1)
-			return
-		case okdown:
-			d.RoundDown(mi + 1)
-			return
-		case okup:
-			d.RoundUp(mi + 1)
-			return
-		}
-	}
-}
-
-type decimalSlice struct {
-	d      []byte
-	nd, dp int
-}
-
-// %e: -d.ddddde±dd
-func fmtE(dst []byte, neg bool, d decimalSlice, prec int, fmt byte) []byte {
-	// sign
-	if neg {
-		dst = append(dst, '-')
-	}
-
-	// first digit
-	ch := byte('0')
-	if d.nd != 0 {
-		ch = d.d[0]
-	}
-	dst = append(dst, ch)
-
-	// .moredigits
-	if prec > 0 {
-		dst = append(dst, '.')
-		i := 1
-		m := min(d.nd, prec+1)
-		if i < m {
-			dst = append(dst, d.d[i:m]...)
-			i = m
-		}
-		for ; i <= prec; i++ {
-			dst = append(dst, '0')
-		}
-	}
-
-	// e±
-	dst = append(dst, fmt)
-	exp := d.dp - 1
-	if d.nd == 0 { // special case: 0 has exponent 0
-		exp = 0
-	}
-	if exp < 0 {
-		ch = '-'
-		exp = -exp
-	} else {
-		ch = '+'
-	}
-	dst = append(dst, ch)
-
-	// dd or ddd
-	switch {
-	case exp < 10:
-		dst = append(dst, '0', byte(exp)+'0')
-	case exp < 100:
-		dst = append(dst, byte(exp/10)+'0', byte(exp%10)+'0')
-	default:
-		dst = append(dst, byte(exp/100)+'0', byte(exp/10)%10+'0', byte(exp%10)+'0')
-	}
-
-	return dst
-}
-
-// %f: -ddddddd.ddddd
-func fmtF(dst []byte, neg bool, d decimalSlice, prec int) []byte {
-	// sign
-	if neg {
-		dst = append(dst, '-')
-	}
-
-	// integer, padded with zeros as needed.
-	if d.dp > 0 {
-		m := min(d.nd, d.dp)
-		dst = append(dst, d.d[:m]...)
-		for ; m < d.dp; m++ {
-			dst = append(dst, '0')
-		}
-	} else {
-		dst = append(dst, '0')
-	}
-
-	// fraction
-	if prec > 0 {
-		dst = append(dst, '.')
-		for i := 0; i < prec; i++ {
-			ch := byte('0')
-			if j := d.dp + i; 0 <= j && j < d.nd {
-				ch = d.d[j]
-			}
-			dst = append(dst, ch)
-		}
-	}
-
-	return dst
-}
-
 // %b: -ddddddddp±ddd
-func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
-	// sign
+func fmtB(dst []byte, neg bool, mant uint64, exp int) []byte {
 	if neg {
 		dst = append(dst, '-')
 	}
-
-	// mantissa
 	dst = AppendUint(dst, mant, 10)
-
-	// p
 	dst = append(dst, 'p')
-
-	// ±exponent
-	exp -= int(flt.mantbits)
 	if exp >= 0 {
 		dst = append(dst, '+')
 	}
 	dst = AppendInt(dst, int64(exp), 10)
-
 	return dst
 }

 // %x: -0x1.yyyyyyyyp±ddd or -0x0p+0. (y is hex digit, d is decimal digit)
-func fmtX(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
+func fmtX(dst []byte, prec int, fmt byte, neg bool, mant uint64, exp, mantBits int) []byte {
 	if mant == 0 {
 		exp = 0
 	}

 	// Shift digits so leading 1 (if any) is at bit 1<<60.
-	mant <<= 60 - flt.mantbits
+	// TODO: Is this the right way to handle subnormals?
+	mant <<= 60 - mantBits
 	for mant != 0 && mant&(1<<60) == 0 {
 		mant <<= 1
 		exp--
--- a/src/internal/strconv/ftoa_test.go
+++ b/src/internal/strconv/ftoa_test.go
@ -136,6 +136,7 @@ var ftoatests = []ftoaTest{

 	{fdiv(5e-304, 1e20), 'g', -1, "5e-324"},   // avoid constant arithmetic
 	{fdiv(-5e-304, 1e20), 'g', -1, "-5e-324"}, // avoid constant arithmetic
+	{fdiv(5e-304, 1e20), 'e', -1, "5e-324"},

 	{32, 'g', -1, "32"},
 	{32, 'g', 0, "3e+01"},
--- a/src/internal/strconv/ftoadbox.go
+++ b/src/internal/strconv/ftoadbox.go
@ -1,349 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strconv
-
-// Binary to decimal conversion using the Dragonbox algorithm by Junekey Jeon.
-//
-// Fixed precision format is not supported by the Dragonbox algorithm
-// so we continue to use Ryū-printf for this purpose.
-// See https://github.com/jk-jeon/dragonbox/issues/38 for more details.
-//
-// For binary to decimal rounding, uses round to nearest, tie to even.
-// For decimal to binary rounding, assumes round to nearest, tie to even.
-//
-// The original paper by Junekey Jeon can be found at:
-// https://github.com/jk-jeon/dragonbox/blob/d5dc40ae6a3f1a4559cda816738df2d6255b4e24/other_files/Dragonbox.pdf
-//
-// The reference implementation in C++ by Junekey Jeon can be found at:
-// https://github.com/jk-jeon/dragonbox/blob/6c7c925b571d54486b9ffae8d9d18a822801cbda/subproject/simple/include/simple_dragonbox.h
-
-// dragonboxFtoa computes the decimal significand and exponent
-// from the binary significand and exponent using the Dragonbox algorithm
-// and formats the decimal floating point number in d.
-func dboxFtoa(d *decimalSlice, mant uint64, exp int, denorm bool, bitSize int) {
-	if bitSize == 32 {
-		dboxFtoa32(d, uint32(mant), exp, denorm)
-		return
-	}
-	dboxFtoa64(d, mant, exp, denorm)
-}
-
-func dboxFtoa64(d *decimalSlice, mant uint64, exp int, denorm bool) {
-	if mant == 1<<float64MantBits && !denorm {
-		// Algorithm 5.6 (page 24).
-		k0 := -mulLog10_2MinusLog10_4Over3(exp)
-		φ, β := dboxPow64(k0, exp)
-		xi, zi := dboxRange64(φ, β)
-		if exp != 2 && exp != 3 {
-			xi++
-		}
-		q := zi / 10
-		if xi <= q*10 {
-			q, zeros := trimZeros(q)
-			dboxDigits(d, q, -k0+1+zeros)
-			return
-		}
-		yru := dboxRoundUp64(φ, β)
-		if exp == -77 && yru%2 != 0 {
-			yru--
-		} else if yru < xi {
-			yru++
-		}
-		dboxDigits(d, yru, -k0)
-		return
-	}
-
-	// κ = 2 for float64 (section 5.1.3)
-	const (
-		κ     = 2
-		p10κ  = 100       // 10**κ
-		p10κ1 = p10κ * 10 // 10**(κ+1)
-	)
-
-	// Algorithm 5.2 (page 15).
-	k0 := -mulLog10_2(exp)
-	φ, β := dboxPow64(κ+k0, exp)
-	zi, exact := dboxMulPow64(uint64(mant*2+1)<<β, φ)
-	s, r := zi/p10κ1, uint32(zi%p10κ1)
-	δi := dboxDelta64(φ, β)
-
-	if r < δi {
-		if r != 0 || !exact || mant%2 == 0 {
-			s, zeros := trimZeros(s)
-			dboxDigits(d, s, -k0+1+zeros)
-			return
-		}
-		s--
-		r = p10κ * 10
-	} else if r == δi {
-		parity, exact := dboxParity64(uint64(mant*2-1), φ, β)
-		if parity || (exact && mant%2 == 0) {
-			s, zeros := trimZeros(s)
-			dboxDigits(d, s, -k0+1+zeros)
-			return
-		}
-	}
-
-	// Algorithm 5.4 (page 18).
-	D := r + p10κ/2 - δi/2
-	t, ρ := D/p10κ, D%p10κ
-	yru := 10*s + uint64(t)
-	if ρ == 0 {
-		parity, exact := dboxParity64(mant*2, φ, β)
-		if parity != ((D-p10κ/2)%2 != 0) || exact && yru%2 != 0 {
-			yru--
-		}
-	}
-	dboxDigits(d, yru, -k0)
-}
-
-// Almost identical to dragonboxFtoa64.
-// This is kept as a separate copy to minimize runtime overhead.
-func dboxFtoa32(d *decimalSlice, mant uint32, exp int, denorm bool) {
-	if mant == 1<<float32MantBits && !denorm {
-		// Algorithm 5.6 (page 24).
-		k0 := -mulLog10_2MinusLog10_4Over3(exp)
-		φ, β := dboxPow32(k0, exp)
-		xi, zi := dboxRange32(φ, β)
-		if exp != 2 && exp != 3 {
-			xi++
-		}
-		q := zi / 10
-		if xi <= q*10 {
-			q, zeros := trimZeros(uint64(q))
-			dboxDigits(d, q, -k0+1+zeros)
-			return
-		}
-		yru := dboxRoundUp32(φ, β)
-		if exp == -77 && yru%2 != 0 {
-			yru--
-		} else if yru < xi {
-			yru++
-		}
-		dboxDigits(d, uint64(yru), -k0)
-		return
-	}
-
-	// κ = 1 for float32 (section 5.1.3)
-	const (
-		κ     = 1
-		p10κ  = 10
-		p10κ1 = p10κ * 10
-	)
-
-	// Algorithm 5.2 (page 15).
-	k0 := -mulLog10_2(exp)
-	φ, β := dboxPow32(κ+k0, exp)
-	zi, exact := dboxMulPow32(uint32(mant*2+1)<<β, φ)
-	s, r := zi/p10κ1, uint32(zi%p10κ1)
-	δi := dboxDelta32(φ, β)
-
-	if r < δi {
-		if r != 0 || !exact || mant%2 == 0 {
-			s, zeros := trimZeros(uint64(s))
-			dboxDigits(d, s, -k0+1+zeros)
-			return
-		}
-		s--
-		r = p10κ * 10
-	} else if r == δi {
-		parity, exact := dboxParity32(uint32(mant*2-1), φ, β)
-		if parity || (exact && mant%2 == 0) {
-			s, zeros := trimZeros(uint64(s))
-			dboxDigits(d, s, -k0+1+zeros)
-			return
-		}
-	}
-
-	// Algorithm 5.4 (page 18).
-	D := r + p10κ/2 - δi/2
-	t, ρ := D/p10κ, D%p10κ
-	yru := 10*s + uint32(t)
-	if ρ == 0 {
-		parity, exact := dboxParity32(mant*2, φ, β)
-		if parity != ((D-p10κ/2)%2 != 0) || exact && yru%2 != 0 {
-			yru--
-		}
-	}
-	dboxDigits(d, uint64(yru), -k0)
-}
-
-// dboxDigits emits decimal digits of mant in d for float64
-// and adjusts the decimal point based on exp.
-func dboxDigits(d *decimalSlice, mant uint64, exp int) {
-	i := formatBase10(d.d, mant)
-	d.d = d.d[i:]
-	d.nd = len(d.d)
-	d.dp = d.nd + exp
-}
-
-// uadd128 returns the full 128 bits of u + n.
-func uadd128(u uint128, n uint64) uint128 {
-	sum := uint64(u.Lo + n)
-	// Check if lo is wrapped around.
-	if sum < u.Lo {
-		u.Hi++
-	}
-	u.Lo = sum
-	return u
-}
-
-// umul64 returns the full 64 bits of x * y.
-func umul64(x, y uint32) uint64 {
-	return uint64(x) * uint64(y)
-}
-
-// umul96Upper64 returns the upper 64 bits (out of 96 bits) of x * y.
-func umul96Upper64(x uint32, y uint64) uint64 {
-	yh := uint32(y >> 32)
-	yl := uint32(y)
-
-	xyh := umul64(x, yh)
-	xyl := umul64(x, yl)
-
-	return xyh + (xyl >> 32)
-}
-
-// umul96Lower64 returns the lower 64 bits (out of 96 bits) of x * y.
-func umul96Lower64(x uint32, y uint64) uint64 {
-	return uint64(uint64(x) * y)
-}
-
-// umul128Upper64 returns the upper 64 bits (out of 128 bits) of x * y.
-func umul128Upper64(x, y uint64) uint64 {
-	a := uint32(x >> 32)
-	b := uint32(x)
-	c := uint32(y >> 32)
-	d := uint32(y)
-
-	ac := umul64(a, c)
-	bc := umul64(b, c)
-	ad := umul64(a, d)
-	bd := umul64(b, d)
-
-	intermediate := (bd >> 32) + uint64(uint32(ad)) + uint64(uint32(bc))
-
-	return ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32)
-}
-
-// umul192Upper128 returns the upper 128 bits (out of 192 bits) of x * y.
-func umul192Upper128(x uint64, y uint128) uint128 {
-	r := umul128(x, y.Hi)
-	t := umul128Upper64(x, y.Lo)
-	return uadd128(r, t)
-}
-
-// umul192Lower128 returns the lower 128 bits (out of 192 bits) of x * y.
-func umul192Lower128(x uint64, y uint128) uint128 {
-	high := x * y.Hi
-	highLow := umul128(x, y.Lo)
-	return uint128{uint64(high + highLow.Hi), highLow.Lo}
-}
-
-// dboxMulPow64 computes x^(i), y^(i), z^(i)
-// from the precomputed value of φ̃k for float64
-// and also checks if x^(f), y^(f), z^(f) == 0 (section 5.2.1).
-func dboxMulPow64(u uint64, phi uint128) (intPart uint64, isInt bool) {
-	r := umul192Upper128(u, phi)
-	intPart = r.Hi
-	isInt = r.Lo == 0
-	return
-}
-
-// dboxMulPow32 computes x^(i), y^(i), z^(i)
-// from the precomputed value of φ̃k for float32
-// and also checks if x^(f), y^(f), z^(f) == 0 (section 5.2.1).
-func dboxMulPow32(u uint32, phi uint64) (intPart uint32, isInt bool) {
-	r := umul96Upper64(u, phi)
-	intPart = uint32(r >> 32)
-	isInt = uint32(r) == 0
-	return
-}
-
-// dboxParity64 computes only the parity of x^(i), y^(i), z^(i)
-// from the precomputed value of φ̃k for float64
-// and also checks if x^(f), y^(f), z^(f) = 0 (section 5.2.1).
-func dboxParity64(mant2 uint64, phi uint128, beta int) (parity bool, isInt bool) {
-	r := umul192Lower128(mant2, phi)
-	parity = ((r.Hi >> (64 - beta)) & 1) != 0
-	isInt = ((uint64(r.Hi << beta)) | (r.Lo >> (64 - beta))) == 0
-	return
-}
-
-// dboxParity32 computes only the parity of x^(i), y^(i), z^(i)
-// from the precomputed value of φ̃k for float32
-// and also checks if x^(f), y^(f), z^(f) = 0 (section 5.2.1).
-func dboxParity32(mant2 uint32, phi uint64, beta int) (parity bool, isInt bool) {
-	r := umul96Lower64(mant2, phi)
-	parity = ((r >> (64 - beta)) & 1) != 0
-	isInt = uint32(r>>(32-beta)) == 0
-	return
-}
-
-// dboxDelta64 returns δ^(i) from the precomputed value of φ̃k for float64.
-func dboxDelta64(φ uint128, β int) uint32 {
-	return uint32(φ.Hi >> (64 - 1 - β))
-}
-
-// dboxDelta32 returns δ^(i) from the precomputed value of φ̃k for float32.
-func dboxDelta32(φ uint64, β int) uint32 {
-	return uint32(φ >> (64 - 1 - β))
-}
-
-// mulLog10_2MinusLog10_4Over3 computes
-// ⌊e*log10(2)-log10(4/3)⌋ = ⌊log10(2^e)-log10(4/3)⌋ (section 6.3).
-func mulLog10_2MinusLog10_4Over3(e int) int {
-	// e should be in the range [-2985, 2936].
-	return (e*631305 - 261663) >> 21
-}
-
-const (
-	floatMantBits64 = 52 // p = 52 for float64.
-	floatMantBits32 = 23 // p = 23 for float32.
-)
-
-// dboxRange64 returns the left and right float64 endpoints.
-func dboxRange64(φ uint128, β int) (left, right uint64) {
-	left = (φ.Hi - (φ.Hi >> (float64MantBits + 2))) >> (64 - float64MantBits - 1 - β)
-	right = (φ.Hi + (φ.Hi >> (float64MantBits + 1))) >> (64 - float64MantBits - 1 - β)
-	return left, right
-}
-
-// dboxRange32 returns the left and right float32 endpoints.
-func dboxRange32(φ uint64, β int) (left, right uint32) {
-	left = uint32((φ - (φ >> (floatMantBits32 + 2))) >> (64 - floatMantBits32 - 1 - β))
-	right = uint32((φ + (φ >> (floatMantBits32 + 1))) >> (64 - floatMantBits32 - 1 - β))
-	return left, right
-}
-
-// dboxRoundUp64 computes the round up of y (i.e., y^(ru)).
-func dboxRoundUp64(phi uint128, beta int) uint64 {
-	return (phi.Hi>>(128/2-floatMantBits64-2-beta) + 1) / 2
-}
-
-// dboxRoundUp32 computes the round up of y (i.e., y^(ru)).
-func dboxRoundUp32(phi uint64, beta int) uint32 {
-	return uint32(phi>>(64-floatMantBits32-2-beta)+1) / 2
-}
-
-// dboxPow64 gets the precomputed value of φ̃̃k for float64.
-func dboxPow64(k, e int) (φ uint128, β int) {
-	φ, e1, _ := pow10(k)
-	if k < 0 || k > 55 {
-		φ.Lo++
-	}
-	β = e + e1 - 1
-	return φ, β
-}
-
-// dboxPow32 gets the precomputed value of φ̃̃k for float32.
-func dboxPow32(k, e int) (mant uint64, exp int) {
-	m, e1, _ := pow10(k)
-	if k < 0 || k > 27 {
-		m.Hi++
-	}
-	exp = e + e1 - 1
-	return m.Hi, exp
-}
--- a/src/internal/strconv/ftoafixed.go
+++ b/src/internal/strconv/ftoafixed.go
@ -1,184 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strconv
-
-import "math/bits"
-
-var uint64pow10 = [...]uint64{
-	1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
-	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
-}
-
-// fixedFtoa formats a number of decimal digits of mant*(2^exp) into d,
-// where mant > 0 and 1 ≤ digits ≤ 18.
-// If fmt == 'f', digits is a conservative overestimate, and the final
-// number of digits is prec past the decimal point.
-func fixedFtoa(d *decimalSlice, mant uint64, exp, digits, prec int, fmt byte) {
-	// The strategy here is to multiply (mant * 2^exp) by a power of 10
-	// to make the resulting integer be the number of digits we want.
-	//
-	// Adams proved in the Ryu paper that 128-bit precision in the
-	// power-of-10 constant is sufficient to produce correctly
-	// rounded output for all float64s, up to 18 digits.
-	// https://dl.acm.org/doi/10.1145/3192366.3192369
-	//
-	// TODO(rsc): The paper is not focused on, nor terribly clear about,
-	// this fact in this context, and the proof seems too complicated.
-	// Post a shorter, more direct proof and link to it here.
-
-	if digits > 18 {
-		panic("fixedFtoa called with digits > 18")
-	}
-
-	// Shift mantissa to have 64 bits,
-	// so that the 192-bit product below will
-	// have at least 63 bits in its top word.
-	b := 64 - bits.Len64(mant)
-	mant <<= b
-	exp -= b
-
-	// We have f = mant * 2^exp ≥ 2^(63+exp)
-	// and we want to multiply it by some 10^p
-	// to make it have the number of digits plus one rounding bit:
-	//
-	//	2 * 10^(digits-1) ≤ f * 10^p < ~2 * 10^digits
-	//
-	// The lower bound is required, but the upper bound is approximate:
-	// we must not have too few digits, but we can round away extra ones.
-	//
-	//	f * 10^p ≥ 2 * 10^(digits-1)
-	//	10^p ≥ 2 * 10^(digits-1) / f                         [dividing by f]
-	//	p ≥ (log₁₀ 2) + (digits-1) - log₁₀ f                 [taking log₁₀]
-	//	p ≥ (log₁₀ 2) + (digits-1) - log₁₀ (mant * 2^exp)    [expanding f]
-	//	p ≥ (log₁₀ 2) + (digits-1) - (log₁₀ 2) * (64 + exp)  [mant < 2⁶⁴]
-	//	p ≥ (digits - 1) - (log₁₀ 2) * (63 + exp)            [refactoring]
-	//
-	// Once we have p, we can compute the scaled value:
-	//
-	//	dm * 2^de = mant * 2^exp * 10^p
-	//	          = mant * 2^exp * pow/2^128 * 2^exp2.
-	//	          = (mant * pow/2^128) * 2^(exp+exp2).
-	p := (digits - 1) - mulLog10_2(63+exp)
-	pow, exp2, ok := pow10(p)
-	if !ok {
-		// This never happens due to the range of float32/float64 exponent
-		panic("fixedFtoa: pow10 out of range")
-	}
-	if -22 <= p && p < 0 {
-		// Special case: Let q=-p. q is in [1,22]. We are dividing by 10^q
-		// and the mantissa may be a multiple of 5^q (5^22 < 2^53),
-		// in which case the division must be computed exactly and
-		// recorded as exact for correct rounding. Our normal computation is:
-		//
-		//	dm = floor(mant * floor(10^p * 2^s))
-		//
-		// for some scaling shift s. To make this an exact division,
-		// it suffices to change the inner floor to a ceil:
-		//
-		//	dm = floor(mant * ceil(10^p * 2^s))
-		//
-		// In the range of values we are using, the floor and ceil
-		// cancel each other out and the high 64 bits of the product
-		// come out exactly right.
-		// (This is the same trick compilers use for division by constants.
-		// See Hacker's Delight, 2nd ed., Chapter 10.)
-		pow.Lo++
-	}
-	dm, lo1, lo0 := umul192(mant, pow)
-	de := exp + exp2
-
-	// Check whether any bits have been truncated from dm.
-	// If so, set dt != 0. If not, leave dt == 0 (meaning dm is exact).
-	var dt uint
-	switch {
-	default:
-		// Most powers of 10 use a truncated constant,
-		// meaning the result is also truncated.
-		dt = 1
-	case 0 <= p && p <= 55:
-		// Small positive powers of 10 (up to 10⁵⁵) can be represented
-		// precisely in a 128-bit mantissa (5⁵⁵ ≤ 2¹²⁸), so the only truncation
-		// comes from discarding the low bits of the 192-bit product.
-		//
-		// TODO(rsc): The new proof mentioned above should also
-		// prove that we can't have lo1 == 0 and lo0 != 0.
-		// After proving that, drop computation and use of lo0 here.
-		dt = bool2uint(lo1|lo0 != 0)
-	case -22 <= p && p < 0 && divisiblePow5(mant, -p):
-		// If the original mantissa was a multiple of 5^p,
-		// the result is exact. (See comment above for pow.Lo++.)
-		dt = 0
-	}
-
-	// The value we want to format is dm * 2^de, where de < 0.
-	// Multiply by 2^de by shifting, but leave one extra bit for rounding.
-	// After the shift, the "integer part" of dm is dm>>1,
-	// the "rounding bit" (the first fractional bit) is dm&1,
-	// and the "truncated bit" (have any bits been discarded?) is dt.
-	shift := -de - 1
-	dt |= bool2uint(dm&(1<<shift-1) != 0)
-	dm >>= shift
-
-	// Set decimal point in eventual formatted digits,
-	// so we can update it as we adjust the digits.
-	d.dp = digits - p
-
-	// Trim excess digit if any, updating truncation and decimal point.
-	// The << 1 is leaving room for the rounding bit.
-	max := uint64pow10[digits] << 1
-	if dm >= max {
-		var r uint
-		dm, r = dm/10, uint(dm%10)
-		dt |= bool2uint(r != 0)
-		d.dp++
-	}
-
-	// If this is %.*f we may have overestimated the digits needed.
-	// Now that we know where the decimal point is,
-	// trim to the actual number of digits, which is d.dp+prec.
-	if fmt == 'f' && digits != d.dp+prec {
-		for digits > d.dp+prec {
-			var r uint
-			dm, r = dm/10, uint(dm%10)
-			dt |= bool2uint(r != 0)
-			digits--
-		}
-
-		// Dropping those digits can create a new leftmost
-		// non-zero digit, like if we are formatting %.1f and
-		// convert 0.09 -> 0.1. Detect and adjust for that.
-		if digits <= 0 {
-			digits = 1
-			d.dp++
-		}
-
-		max = uint64pow10[digits] << 1
-	}
-
-	// Round and shift away rounding bit.
-	// We want to round up when
-	// (a) the fractional part is > 0.5 (dm&1 != 0 and dt == 1)
-	// (b) or the fractional part is ≥ 0.5 and the integer part is odd
-	//     (dm&1 != 0 and dm&2 != 0).
-	// The bitwise expression encodes that logic.
-	dm += uint64(uint(dm) & (dt | uint(dm)>>1) & 1)
-	dm >>= 1
-	if dm == max>>1 {
-		// 999... rolled over to 1000...
-		dm = uint64pow10[digits-1]
-		d.dp++
-	}
-
-	// Format digits into d.
-	if dm != 0 {
-		if formatBase10(d.d[:digits], dm) != 0 {
-			panic("formatBase10")
-		}
-		d.nd = digits
-		for d.d[d.nd-1] == '0' {
-			d.nd--
-		}
-	}
-}
--- a/src/internal/strconv/import_test.go
+++ b/src/internal/strconv/import_test.go
@ -6,21 +6,8 @@ package strconv_test

 import . "internal/strconv"

-type uint128 = Uint128
-
-const (
-	pow10Min = Pow10Min
-	pow10Max = Pow10Max
-)
-
 var (
-	mulLog10_2       = MulLog10_2
-	mulLog2_10       = MulLog2_10
+	log2Pow10        = Log2Pow10
+	log10Pow2        = Log10Pow2
 	parseFloatPrefix = ParseFloatPrefix
-	pow10            = Pow10
-	umul128          = Umul128
-	umul192          = Umul192
-	div5Tab          = Div5Tab
-	divisiblePow5    = DivisiblePow5
-	trimZeros        = TrimZeros
 )
--- a/src/internal/strconv/itoa.go
+++ b/src/internal/strconv/itoa.go
@ -15,8 +15,9 @@ func FormatUint(i uint64, base int) string {
 			return small(int(i))
 		}
 		var a [24]byte
-		j := formatBase10(a[:], i)
-		return string(a[j:])
+		nd := numDigits(i)
+		formatBase10(a[:nd], i)
+		return string(a[:nd])
 	}
 	_, s := formatBits(nil, i, base, false, false)
 	return s
@ -35,12 +36,13 @@ func FormatInt(i int64, base int) string {
 		if i < 0 {
 			u = -u
 		}
-		j := formatBase10(a[:], u)
+		nd := numDigits(u)
+		formatBase10(a[1:1+nd], u)
 		if i < 0 {
-			j--
-			a[j] = '-'
+			a[0] = '-'
+			return string(a[:1+nd])
 		}
-		return string(a[j:])
+		return string(a[1 : 1+nd])
 	}
 	_, s := formatBits(nil, uint64(i), base, i < 0, false)
 	return s
@ -70,8 +72,9 @@ func AppendUint(dst []byte, i uint64, base int) []byte {
 			return append(dst, small(int(i))...)
 		}
 		var a [24]byte
-		j := formatBase10(a[:], i)
-		return append(dst, a[j:]...)
+		nd := numDigits(i)
+		formatBase10(a[:nd], i)
+		return append(dst, a[:nd]...)
 	}
 	dst, _ = formatBits(dst, i, base, false, true)
 	return dst
@ -164,8 +167,6 @@ const smalls = "00010203040506070809" +
 	"80818283848586878889" +
 	"90919293949596979899"

-const host64bit = ^uint(0)>>32 != 0
-
 // small returns the string for an i with 0 <= i < nSmalls.
 func small(i int) string {
 	if i < 10 {
@ -179,59 +180,52 @@ func small(i int) string {
 // It is only for use by package runtime.
 // Other packages should use AppendUint.
 func RuntimeFormatBase10(a []byte, u uint64) int {
-	return formatBase10(a, u)
-}
-
-// formatBase10 formats the decimal representation of u into the tail of a
-// and returns the offset of the first byte written to a. That is, after
-//
-//	i := formatBase10(a, u)
-//
-// the decimal representation is in a[i:].
-func formatBase10(a []byte, u uint64) int {
-	// Split into 9-digit chunks that fit in uint32s
-	// and convert each chunk using uint32 math instead of uint64 math.
-	// The obvious way to write the outer loop is "for u >= 1e9", but most numbers are small,
-	// so the setup for the comparison u >= 1e9 is usually pure overhead.
-	// Instead, we approximate it by u>>29 != 0, which is usually faster and good enough.
-	i := len(a)
-	for (host64bit && u>>29 != 0) || (!host64bit && uint32(u)>>29|uint32(u>>32) != 0) {
-		var lo uint32
-		u, lo = u/1e9, uint32(u%1e9)
-
-		// Convert 9 digits.
-		for range 4 {
-			var dd uint32
-			lo, dd = lo/100, (lo%100)*2
-			i -= 2
-			a[i+0], a[i+1] = smalls[dd+0], smalls[dd+1]
-		}
-		i--
-		a[i] = smalls[lo*2+1]
-
-		// If we'd been using u >= 1e9 then we would be guaranteed that u/1e9 > 0,
-		// but since we used u>>29 != 0, u/1e9 might be 0, so we might be done.
-		// (If u is now 0, then at the start we had 2²⁹ ≤ u < 10⁹, so it was still correct
-		// to write 9 digits; we have not accidentally written any leading zeros.)
-		if u == 0 {
-			return i
-		}
-	}
-
-	// Convert final chunk, at most 8 digits.
-	lo := uint32(u)
-	for lo >= 100 {
-		var dd uint32
-		lo, dd = lo/100, (lo%100)*2
-		i -= 2
-		a[i+0], a[i+1] = smalls[dd+0], smalls[dd+1]
-	}
-	i--
-	dd := lo * 2
-	a[i] = smalls[dd+1]
-	if lo >= 10 {
-		i--
-		a[i] = smalls[dd+0]
-	}
+	// Note: numDigits requires an argument ≥ 1.
+	// The |1 changes 0 to 1 without adding an extra digit
+	// to any other value.
+	i := len(a) - numDigits(u|1)
+	formatBase10(a[i:], u)
 	return i
 }
+
+// formatBase10 formats the decimal representation of u into a.
+// The caller is responsible for ensuring that a is big enough to hold u.
+// If a is too big, leading zeros will be filled in as needed.
+func formatBase10(a []byte, u uint64) {
+	nd := len(a)
+	for nd >= 8 {
+		// Format last 8 digits (4 pairs).
+		x3210 := uint32(u % 1e8)
+		u /= 1e8
+		x32, x10 := x3210/1e4, x3210%1e4
+		x1, x0 := (x10/100)*2, (x10%100)*2
+		x3, x2 := (x32/100)*2, (x32%100)*2
+		a[nd-1], a[nd-2] = smalls[x0+1], smalls[x0]
+		a[nd-3], a[nd-4] = smalls[x1+1], smalls[x1]
+		a[nd-5], a[nd-6] = smalls[x2+1], smalls[x2]
+		a[nd-7], a[nd-8] = smalls[x3+1], smalls[x3]
+		nd -= 8
+	}
+
+	x := uint32(u)
+	if nd >= 4 {
+		// Format last 4 digits (2 pairs).
+		x10 := x % 1e4
+		x /= 1e4
+		x1, x0 := (x10/100)*2, (x10%100)*2
+		a[nd-1], a[nd-2] = smalls[x0+1], smalls[x0]
+		a[nd-3], a[nd-4] = smalls[x1+1], smalls[x1]
+		nd -= 4
+	}
+	if nd >= 2 {
+		// Format last 2 digits.
+		x0 := (x % 1e2) * 2
+		x /= 1e2
+		a[nd-1], a[nd-2] = smalls[x0+1], smalls[x0]
+		nd -= 2
+	}
+	if nd > 0 {
+		// Format final digit.
+		a[0] = byte('0' + x)
+	}
+}
--- a/src/internal/strconv/itoa_test.go
+++ b/src/internal/strconv/itoa_test.go
@ -86,6 +86,15 @@ func TestItoa(t *testing.T) {
 			}
 		}

+		if test.base == 10 && test.in >= 0 {
+			buf := make([]byte, 32)
+			i := RuntimeFormatBase10(buf[:], uint64(test.in))
+			s := string(buf[i:])
+			if s != test.out {
+				t.Errorf("RuntimeFormatBase10(%d) = %q, want %q", test.in, s, test.out)
+			}
+		}
+
 		if test.base == 10 && int64(int(test.in)) == test.in {
 			s := Itoa(int(test.in))
 			if s != test.out {
@ -131,7 +140,14 @@ func TestUitoa(t *testing.T) {
 			t.Errorf("AppendUint(%q, %v, %v) = %q want %v",
 				"abc", test.in, test.base, x, test.out)
 		}
-
+		if test.base == 10 {
+			buf := make([]byte, 32)
+			i := RuntimeFormatBase10(buf[:], test.in)
+			s := string(buf[i:])
+			if s != test.out {
+				t.Errorf("RuntimeFormatBase10(%d) = %q, want %q", test.in, s, test.out)
+			}
+		}
 	}
 }

--- a/src/internal/strconv/math.go
+++ b/src/internal/strconv/math.go
@ -1,179 +0,0 @@
-// Copyright 2025 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package strconv
-
-import "math/bits"
-
-// A uint128 is a 128-bit uint.
-// The fields are exported to make them visible to package strconv_test.
-type uint128 struct {
-	Hi uint64
-	Lo uint64
-}
-
-// umul128 returns the 128-bit product x*y.
-func umul128(x, y uint64) uint128 {
-	hi, lo := bits.Mul64(x, y)
-	return uint128{hi, lo}
-}
-
-// umul192 returns the 192-bit product x*y in three uint64s.
-func umul192(x uint64, y uint128) (hi, mid, lo uint64) {
-	mid1, lo := bits.Mul64(x, y.Lo)
-	hi, mid2 := bits.Mul64(x, y.Hi)
-	mid, carry := bits.Add64(mid1, mid2, 0)
-	return hi + carry, mid, lo
-}
-
-// pow10 returns the 128-bit mantissa and binary exponent of 10**e.
-// That is, 10^e = mant/2^128 * 2**exp.
-// If e is out of range, pow10 returns ok=false.
-func pow10(e int) (mant uint128, exp int, ok bool) {
-	if e < pow10Min || e > pow10Max {
-		return
-	}
-	return pow10Tab[e-pow10Min], 1 + mulLog2_10(e), true
-}
-
-// mulLog10_2 returns math.Floor(x * log(2)/log(10)) for an integer x in
-// the range -1600 <= x && x <= +1600.
-//
-// The range restriction lets us work in faster integer arithmetic instead of
-// slower floating point arithmetic. Correctness is verified by unit tests.
-func mulLog10_2(x int) int {
-	// log(2)/log(10) ≈ 0.30102999566 ≈ 78913 / 2^18
-	return (x * 78913) >> 18
-}
-
-// mulLog2_10 returns math.Floor(x * log(10)/log(2)) for an integer x in
-// the range -500 <= x && x <= +500.
-//
-// The range restriction lets us work in faster integer arithmetic instead of
-// slower floating point arithmetic. Correctness is verified by unit tests.
-func mulLog2_10(x int) int {
-	// log(10)/log(2) ≈ 3.32192809489 ≈ 108853 / 2^15
-	return (x * 108853) >> 15
-}
-
-func bool2uint(b bool) uint {
-	if b {
-		return 1
-	}
-	return 0
-}
-
-// Exact Division and Remainder Checking
-//
-// An exact division x/c (exact means x%c == 0)
-// can be implemented by x*m where m is the multiplicative inverse of c (m*c == 1).
-//
-// Since c is also the multiplicative inverse of m, x*m is lossless,
-// and all the exact multiples of c map to all of [0, maxUint64/c].
-// The non-multiples are forced to map to larger values.
-// This also gives a quick test for whether x is an exact multiple of c:
-// compute the exact division and check whether it's at most maxUint64/c:
-//	x%c == 0 => x*m <= maxUint64/c.
-//
-// Only odd c have multiplicative inverses mod powers of two.
-// To do an exact divide x / (c<<s) we can use (x/c)>>s instead.
-// And to check for remainder, we need to check that those low s
-// bits are all zero before we shift them away. We can merge that
-// with the <= for the exact odd remainder check by rotating the
-// shifted bits into the high part instead:
-// 	x%(c<<s) == 0 => bits.RotateLeft64(x*m, -s) <= maxUint64/c.
-//
-// The compiler does this transformation automatically in general,
-// but we apply it here by hand in a few ways that the compiler can't help with.
-//
-// For a more detailed explanation, see
-// Henry S. Warren, Jr., Hacker's Delight, 2nd ed., sections 10-16 and 10-17.
-
-// divisiblePow5 reports whether x is divisible by 5^p.
-// It returns false for p not in [1, 22],
-// because we only care about float64 mantissas, and 5^23 > 2^53.
-func divisiblePow5(x uint64, p int) bool {
-	return 1 <= p && p <= 22 && x*div5Tab[p-1][0] <= div5Tab[p-1][1]
-}
-
-const maxUint64 = 1<<64 - 1
-
-// div5Tab[p-1] is the multiplicative inverse of 5^p and maxUint64/5^p.
-var div5Tab = [22][2]uint64{
-	{0xcccccccccccccccd, maxUint64 / 5},
-	{0x8f5c28f5c28f5c29, maxUint64 / 5 / 5},
-	{0x1cac083126e978d5, maxUint64 / 5 / 5 / 5},
-	{0xd288ce703afb7e91, maxUint64 / 5 / 5 / 5 / 5},
-	{0x5d4e8fb00bcbe61d, maxUint64 / 5 / 5 / 5 / 5 / 5},
-	{0x790fb65668c26139, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xe5032477ae8d46a5, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xc767074b22e90e21, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x8e47ce423a2e9c6d, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x4fa7f60d3ed61f49, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x0fee64690c913975, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x3662e0e1cf503eb1, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xa47a2cf9f6433fbd, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x54186f653140a659, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x7738164770402145, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xe4a4d1417cd9a041, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xc75429d9e5c5200d, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xc1773b91fac10669, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x26b172506559ce15, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0xd489e3a9addec2d1, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x90e860bb892c8d5d, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-	{0x502e79bf1b6f4f79, maxUint64 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5 / 5},
-}
-
-// trimZeros trims trailing zeros from x.
-// It finds the largest p such that x % 10^p == 0
-// and then returns x / 10^p, p.
-//
-// This is here for reference and tested, because it is an optimization
-// used by other ftoa algorithms, but in our implementations it has
-// never been benchmarked to be faster than trimming zeros after
-// formatting into decimal bytes.
-func trimZeros(x uint64) (uint64, int) {
-	const (
-		div1e8m  = 0xc767074b22e90e21
-		div1e8le = maxUint64 / 100000000
-
-		div1e4m  = 0xd288ce703afb7e91
-		div1e4le = maxUint64 / 10000
-
-		div1e2m  = 0x8f5c28f5c28f5c29
-		div1e2le = maxUint64 / 100
-
-		div1e1m  = 0xcccccccccccccccd
-		div1e1le = maxUint64 / 10
-	)
-
-	// _ = assert[x - y] asserts at compile time that x == y.
-	// Assert that the multiplicative inverses are correct
-	// by checking that (div1eNm * 5^N) % 1<<64 == 1.
-	var assert [1]struct{}
-	_ = assert[(div1e8m*5*5*5*5*5*5*5*5)%(1<<64)-1]
-	_ = assert[(div1e4m*5*5*5*5)%(1<<64)-1]
-	_ = assert[(div1e2m*5*5)%(1<<64)-1]
-	_ = assert[(div1e1m*5)%(1<<64)-1]
-
-	// Cut 8 zeros, then 4, then 2, then 1.
-	p := 0
-	for d := bits.RotateLeft64(x*div1e8m, -8); d <= div1e8le; d = bits.RotateLeft64(x*div1e8m, -8) {
-		x = d
-		p += 8
-	}
-	if d := bits.RotateLeft64(x*div1e4m, -4); d <= div1e4le {
-		x = d
-		p += 4
-	}
-	if d := bits.RotateLeft64(x*div1e2m, -2); d <= div1e2le {
-		x = d
-		p += 2
-	}
-	if d := bits.RotateLeft64(x*div1e1m, -1); d <= div1e1le {
-		x = d
-		p += 1
-	}
-	return x, p
-}
--- a/src/internal/strconv/math_test.go
+++ b/src/internal/strconv/math_test.go
@ -5,161 +5,26 @@
 package strconv_test

 import (
-	. "internal/strconv"
 	"math"
 	"testing"
 )

-var pow10Tests = []struct {
-	exp10 int
-	mant  uint128
-	exp2  int
-	ok    bool
-}{
-	{-349, uint128{0, 0}, 0, false},
-	{-348, uint128{0xFA8FD5A0081C0288, 0x1732C869CD60E453}, -1156, true},
-	{0, uint128{0x8000000000000000, 0x0000000000000000}, 1, true},
-	{347, uint128{0xD13EB46469447567, 0x4B7195F2D2D1A9FB}, 1153, true},
-	{348, uint128{0, 0}, 0, false},
-}
-
-func TestPow10(t *testing.T) {
-	for _, tt := range pow10Tests {
-		mant, exp2, ok := pow10(tt.exp10)
-		if mant != tt.mant || exp2 != tt.exp2 {
-			t.Errorf("pow10(%d) = %#016x, %#016x, %d, %v want %#016x,%#016x, %d, %v",
-				tt.exp10, mant.Hi, mant.Lo, exp2, ok,
-				tt.mant.Hi, tt.mant.Lo, tt.exp2, tt.ok)
-		}
-	}
-
-	for p := pow10Min; p <= pow10Max; p++ {
-		mant, exp2, ok := pow10(p)
-		if !ok {
-			t.Errorf("pow10(%d) not ok", p)
-			continue
-		}
-		// Note: -64 instead of -128 because we only used mant.Hi, not all of mant.
-		have := math.Ldexp(float64(mant.Hi), exp2-64)
-		want := math.Pow(10, float64(p))
-		if math.Abs(have-want)/want > 0.00001 {
-			t.Errorf("pow10(%d) = %#016x%016x/2^128 * 2^%d = %g want ~%g", p, mant.Hi, mant.Lo, exp2, have, want)
-		}
-	}
-
-}
-
-func u128(hi, lo uint64) uint128 {
-	return uint128{Hi: hi, Lo: lo}
-}
-
-var umul192Tests = []struct {
-	x   uint64
-	y   uint128
-	hi  uint64
-	mid uint64
-	lo  uint64
-}{
-	{0, u128(0, 0), 0, 0, 0},
-	{^uint64(0), u128(^uint64(0), ^uint64(0)), ^uint64(1), ^uint64(0), 1},
-}
-
-func TestUmul192(t *testing.T) {
-	for _, tt := range umul192Tests {
-		hi, mid, lo := Umul192(tt.x, tt.y)
-		if hi != tt.hi || mid != tt.mid || lo != tt.lo {
-			t.Errorf("umul192(%#x, {%#x,%#x}) = %#x, %#x, %#x, want %#x, %#x, %#x",
-				tt.x, tt.y.Hi, tt.y.Lo, hi, mid, lo, tt.hi, tt.mid, tt.lo)
-		}
-	}
-}
-
-func TestMulLog10_2(t *testing.T) {
+func TestLog10Pow2(t *testing.T) {
 	for x := -1600; x <= +1600; x++ {
-		iMath := mulLog10_2(x)
+		iMath := log10Pow2(x)
 		fMath := int(math.Floor(float64(x) * math.Ln2 / math.Ln10))
 		if iMath != fMath {
-			t.Errorf("mulLog10_2(%d) failed: %d vs %d\n", x, iMath, fMath)
+			t.Errorf("log10Pow2(%d) = %d, want %d\n", x, iMath, fMath)
 		}
 	}
 }

-func TestMulLog2_10(t *testing.T) {
+func TestLog2Pow10(t *testing.T) {
 	for x := -500; x <= +500; x++ {
-		iMath := mulLog2_10(x)
+		iMath := log2Pow10(x)
 		fMath := int(math.Floor(float64(x) * math.Ln10 / math.Ln2))
 		if iMath != fMath {
-			t.Errorf("mulLog2_10(%d) failed: %d vs %d\n", x, iMath, fMath)
-		}
-	}
-}
-
-func pow5(p int) uint64 {
-	x := uint64(1)
-	for range p {
-		x *= 5
-	}
-	return x
-}
-
-func TestDivisiblePow5(t *testing.T) {
-	for p := 1; p <= 22; p++ {
-		x := pow5(p)
-		if divisiblePow5(1, p) {
-			t.Errorf("divisiblePow5(1, %d) = true, want, false", p)
-		}
-		if divisiblePow5(x-1, p) {
-			t.Errorf("divisiblePow5(%d, %d) = true, want false", x-1, p)
-		}
-		if divisiblePow5(x+1, p) {
-			t.Errorf("divisiblePow5(%d, %d) = true, want false", x-1, p)
-		}
-		if divisiblePow5(x/5, p) {
-			t.Errorf("divisiblePow5(%d, %d) = true, want false", x/5, p)
-		}
-		if !divisiblePow5(0, p) {
-			t.Errorf("divisiblePow5(0, %d) = false, want true", p)
-		}
-		if !divisiblePow5(x, p) {
-			t.Errorf("divisiblePow5(%d, %d) = false, want true", x, p)
-		}
-		if 2*x > x && !divisiblePow5(2*x, p) {
-			t.Errorf("divisiblePow5(%d, %d) = false, want true", 2*x, p)
-		}
-	}
-}
-
-func TestDiv5Tab(t *testing.T) {
-	for p := 1; p <= 22; p++ {
-		m := div5Tab[p-1][0]
-		le := div5Tab[p-1][1]
-
-		// See comment in math.go on div5Tab.
-		// m needs to be multiplicative inverse of pow5(p).
-		if m*pow5(p) != 1 {
-			t.Errorf("pow5Tab[%d-1][0] = %#x, but %#x * (5**%d) = %d, want 1", p, m, m, p, m*pow5(p))
-		}
-
-		// le needs to be ⌊(1<<64 - 1) / 5^p⌋.
-		want := (1<<64 - 1) / pow5(p)
-		if le != want {
-			t.Errorf("pow5Tab[%d-1][1] = %#x, want %#x", p, le, want)
-		}
-	}
-}
-
-func TestTrimZeros(t *testing.T) {
-	for _, x := range []uint64{1, 2, 3, 4, 101, 123} {
-		want := x
-		for p := range 20 {
-			haveX, haveP := trimZeros(x)
-			if haveX != want || haveP != p {
-				t.Errorf("trimZeros(%d) = %d, %d, want %d, %d", x, haveX, haveP, want, p)
-			}
-			if x >= (1<<64-1)/10 {
-				break
-			}
-			x *= 10
+			t.Errorf("log2Pow10(%d) = %d, want %d\n", x, iMath, fMath)
 		}
 	}
 }
--- a/src/internal/strconv/pow10gen.go
+++ b/src/internal/strconv/pow10gen.go
@ -55,7 +55,19 @@ func main() {
 		}
 		d := new(big.Int).Div(r.Num(), r.Denom())
 		hi, lo := new(big.Int).DivMod(d, b1p64, new(big.Int))
-		fmt.Fprintf(&out, "\t{%#016x, %#016x}, // 1e%d * 2**%d\n", hi.Uint64(), lo.Uint64(), e, be)
+		uhi := hi.Uint64()
+		ulo := lo.Uint64()
+		if !r.IsInt() {
+			ulo++
+			if ulo == 0 {
+				uhi++
+			}
+		}
+		if ulo != 0 {
+			uhi++
+			ulo = -ulo
+		}
+		fmt.Fprintf(&out, "\t{%#016x, %#016x}, // 1e%d * 2**%d\n", uhi, ulo, e, be)
 	}
 	fmt.Fprintf(&out, "}\n")

@ -86,6 +98,6 @@ const (


 // pow10Tab holds 128-bit mantissas of powers of 10.
-// The values are scaled so the high bit is always set; there is no "implicit leading 1 bit".
-var pow10Tab = [...]uint128{
+// The values are scaled so the high bit is always set.
+var pow10Tab = [...]pmHiLo{
 `
--- a/src/internal/strconv/pow10tab.go
+++ b/src/internal/strconv/pow10tab.go
--- a/src/internal/strconv/uscale.go
+++ b/src/internal/strconv/uscale.go
@ -0,0 +1,290 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Floating point binary↔decimal conversion by fast unrounded scaling.
+// See “Floating-Point Printing and Parsing Can Be Simple And Fast”,
+// https://research.swtch.com/fp
+
+package strconv
+
+import (
+	"math/bits"
+	"unsafe"
+)
+
+// bool2 converts b to an integer: 1 for true, 0 for false.
+func bool2[T ~int | ~uint32 | ~uint64](b bool) T {
+	if b {
+		return 1
+	}
+	return 0
+}
+
+// pack64 takes m, e and returns f = m * 2**e.
+// It assumes the caller has provided a 53-bit mantissa m
+// and an exponent that is in range for the mantissa.
+func pack64(m uint64, e int) (float64, error) {
+	if m&(1<<52) == 0 {
+		return float64frombits(m), nil
+	}
+	if e >= 0x7FF-1075 {
+		return float64frombits(m&(1<<63) | 0x7ff<<52), ErrRange
+	}
+	return float64frombits(m&^(1<<52) | uint64(1075+e)<<52), nil
+}
+
+// pack32 takes m, e and returns f = m * 2**e.
+// It assumes the caller has provided a 24-bit mantissa m
+// and an exponent that is in range for the mantissa.
+func pack32(m uint32, e int) (float32, error) {
+	if m&(1<<23) == 0 {
+		return float32frombits(m), nil
+	}
+	if e >= 0xFF-150 {
+		return float32frombits(m&(1<<31) | 0xff<<23), ErrRange
+	}
+	return float32frombits(m&^(1<<23) | uint32(150+e)<<23), nil
+}
+
+// An unrounded represents an unrounded value.
+type unrounded uint64
+
+func (u unrounded) floor() uint64         { return uint64((u + 0) >> 2) }
+func (u unrounded) roundHalfDown() uint64 { return uint64((u + 1) >> 2) }
+func (u unrounded) round() uint64         { return uint64((u + 1 + (u>>2)&1) >> 2) }
+func (u unrounded) roundHalfUp() uint64   { return uint64((u + 2) >> 2) }
+func (u unrounded) ceil() uint64          { return uint64((u + 3) >> 2) }
+func (u unrounded) nudge(δ int) unrounded { return u + unrounded(δ) }
+
+func (u unrounded) div(d uint64) unrounded {
+	x := uint64(u)
+	return unrounded(x/d) | u&1 | bool2[unrounded](x%d != 0)
+}
+
+func (u unrounded) rsh(s int) unrounded {
+	return u>>s | u&1 | bool2[unrounded](u&((1<<s)-1) != 0)
+}
+
+// log10Pow2(x) returns ⌊log₁₀ 2**x⌋ = ⌊x * log₁₀ 2⌋.
+func log10Pow2(x int) int {
+	// log₁₀ 2 ≈ 0.30102999566 ≈ 78913 / 2^18
+	return (x * 78913) >> 18
+}
+
+// log2Pow10(x) returns ⌊log₂ 10**x⌋ = ⌊x * log₂ 10⌋.
+func log2Pow10(x int) int {
+	// log₂ 10 ≈ 3.32192809489 ≈ 108853 / 2^15
+	return (x * 108853) >> 15
+}
+
+// uint64pow10[x] is 10**x.
+var uint64pow10 = [...]uint64{
+	1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
+	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+}
+
+// fixedWidthFloat returns the n-digit decimal form of f = m * 2**e as d * 10**p.
+// n can be at most 18.
+// If fmt == 'f' then n is a conservative estimate of the number of digits,
+// and digits are discarded to match prec.
+func fixedWidthFloat(m uint64, e, n, prec int, fmt byte) (d uint64, p int) {
+	p = n - 1 - log10Pow2(e+63)
+	var pre scaler
+	prescale(&pre, e, p, log2Pow10(p))
+	u := uscale(m, &pre)
+	if u >= unmin(uint64pow10[n]) {
+		u = u.div(10)
+		p--
+	}
+	if fmt == 'f' {
+		for p > prec {
+			u = u.div(10)
+			p--
+		}
+	}
+	return u.round(), -p
+}
+
+// parseFloat64 rounds d * 10**p to the nearest float64 f.
+// d can have at most 19 digits.
+// It returns ErrRange if the result rounds to infinity.
+func parseFloat64(d uint64, p int, sign uint64) (float64, error) {
+	b := bits.Len64(d)
+	lp := log2Pow10(p)
+	e := min(1074, 53-b-lp)
+	var pre scaler
+	prescale(&pre, e-(64-b), p, lp)
+	if pre.s >= 64 {
+		return float64frombits(sign | 0), nil
+	}
+	u := uscale(d<<(64-b), &pre)
+
+	// This block is branch-free code for:
+	//	if u.round() >= 1<<53 {
+	//		u = u.rsh(1)
+	//		e = e - 1
+	//	}
+	s := bool2[int](u >= unmin(1<<53))
+	u = u>>s | u&1
+	e = e - s
+
+	return pack64(sign|u.round(), -e)
+}
+
+// parseFloat32 rounds d * 10**p to the nearest float32 f.
+// d can have at most 19 digits.
+// It returns ErrRange if the result rounds to infinity.
+func parseFloat32(d uint64, p int, sign uint32) (float32, error) {
+	b := bits.Len64(d)
+	lp := log2Pow10(p)
+	e := min(149, 24-b-lp)
+	var pre scaler
+	prescale(&pre, e-(64-b), p, lp)
+	if pre.s >= 64 {
+		return float32frombits(sign | 0), nil
+	}
+	u := uscale(d<<(64-b), &pre)
+
+	// This block is branch-free code for:
+	//	if u.round() >= 1<<24 {
+	//		u = u.rsh(1)
+	//		e = e - 1
+	//	}
+	s := bool2[int](u >= unmin(1<<24))
+	u = u>>s | u&1
+	e = e - s
+
+	return pack32(sign|uint32(u.round()), -e)
+}
+
+// unmin returns the minimum unrounded that rounds to x.
+func unmin(x uint64) unrounded {
+	return unrounded(x<<2 - 2)
+}
+
+// shortFloat computes the shortest formatting of f,
+// using as few digits as possible that will still round trip
+// back to the original float.
+func shortFloat[F float32 | float64](m uint64, e int) (d uint64, p int) {
+	var mantBits, minExp int // parameterized constants
+	switch 8 * unsafe.Sizeof(F(0)) {
+	case 32:
+		mantBits = float32MantBits
+		minExp = float32MinExp
+	case 64:
+		mantBits = float64MantBits
+		minExp = float64MinExp
+	}
+
+	// Note: these cases could be factored a little more,
+	// but in the first two branches, z is a constant,
+	// allowing the compiler to greatly simplify the code.
+	var min, max uint64
+	var odd int
+	z := 63 - mantBits
+	if m == 1<<63 && e > minExp {
+		p = -skewed(e + z)
+		min = m - 1<<(z-2) // min = m - 1/4 * 2**(e+z)
+		max = m + 1<<(z-1) // max = m + 1/2 * 2**(e+z)
+		odd = int(m>>z) & 1
+	} else if e >= minExp {
+		p = -log10Pow2(e + z)
+		min = m - 1<<(z-1) // min = m - 1/2 * 2**(e+z)
+		max = m + 1<<(z-1) // max = m + 1/2 * 2**(e+z)
+		odd = int(m>>z) & 1
+	} else {
+		z = z + (minExp - e)
+		p = -log10Pow2(e + z)
+		min = m - 1<<(z-1) // min = m - 1/2 * 2**(e+z)
+		max = m + 1<<(z-1) // max = m + 1/2 * 2**(e+z)
+		odd = int(m>>z) & 1
+	}
+
+	var pre scaler
+	prescale(&pre, e, p, log2Pow10(p))
+	dmin := uscale(min, &pre).nudge(+odd).ceil()
+	dmax := uscale(max, &pre).nudge(-odd).floor()
+
+	if d = dmax / 10; d*10 >= dmin {
+		return d, -(p - 1)
+	}
+	if d = dmin; d < dmax {
+		d = uscale(m, &pre).round()
+	}
+	return d, -p
+}
+
+// skewed computes the skewed footprint of m * 2**e,
+// which is ⌊log₁₀ 3/4 * 2**e⌋ = ⌊e*(log₁₀ 2)-(log₁₀ 4/3)⌋.
+func skewed(e int) int {
+	return (e*631305 - 261663) >> 21
+}
+
+// A pmHiLo represents hi<<64 - lo.
+type pmHiLo struct {
+	hi uint64
+	lo uint64
+}
+
+// A scaler holds derived scaling constants for a given e, p pair.
+type scaler struct {
+	// Note: using pm pmHiLo here nudges uscale just over the inlining boundary. Don't.
+	pmHi uint64
+	pmLo uint64
+	s    int
+}
+
+// prescale returns the scaling constants for e, p.
+// lp must be log2Pow10(p).
+// The caller is responsible for either avoiding e, p pairs
+// that cause pre.s < 0 or pre.s >= 64, or else handling
+// those cases before passing the result to uscale.
+// In practice, pre.s < 0 would indicate a buggy caller
+// and pre.s >= 64 can only happen for parsing and is
+// picked off at those call sites.
+func prescale(pre *scaler, e, p, lp int) {
+	pre.pmHi = pow10Tab[p-pow10Min].hi
+	pre.pmLo = pow10Tab[p-pow10Min].lo
+	pre.s = -(e + lp + 3)
+}
+
+// uscale returns unround(x * 2**e * 10**p).
+// The caller should pass &pre for prescale(&pre, e, p, log2Pow10(p))
+// and should have left-justified x so its high bit is set.
+// The caller is also responsible for checking that c.s < 64.
+// For formatting, that's always true.
+// For parsing, the caller needs to pick it off early and return a signed 0.
+func uscale(x uint64, c *scaler) unrounded {
+	hi, mid := bits.Mul64(x, c.pmHi)
+	s := c.s & 63 // make shifts cheaper
+	if hi>>s<<s != hi {
+		return unrounded(hi>>s | 1)
+	}
+	mid2, _ := bits.Mul64(x, c.pmLo)
+	hi -= bool2[uint64](mid < mid2)
+	return unrounded(hi>>s | bool2[uint64](mid-mid2 > 1))
+}
+
+// setDigits sets digs to the nd digits described by d, p.
+func setDigits(s []byte, d uint64, p, nd int) (dp, nzd int) {
+	// Note: nd <= len(s) is guaranteed by caller,
+	// but writing it explicitly here lets the compiler know,
+	// so that it can remove the bounds check in the loop.
+	// (The slice s[:nd] not panicking only establishes nd <= cap(s).)
+	if nd <= len(s) {
+		formatBase10(s[:nd], d)
+		dp = nd + p
+		for nd > 0 && s[nd-1] == '0' {
+			nd--
+		}
+	}
+	return dp, nd
+}
+
+// numDigits returns the number of decimal digits in d.
+// It requires d ≥ 1.
+func numDigits(d uint64) int {
+	nd := log10Pow2(bits.Len64(d))
+	return nd + bool2[int](d >= uint64pow10[nd])
+}