mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
internal/strconv: delete ftoaryu
CL 700075 made this dead code.
Benchmarks below for CL 700075, testing Dragonbox vs the old Ryu being deleted.
The "Fixed" benchmarks are unchanged, which gives a sense of the noise level.
benchmark \ host linux-amd64 s7 linux-arm64 local linux-386 s7:GOARCH=386 linux-arm
vs base vs base vs base vs base vs base vs base vs base
AppendFloat/Decimal -2.68% +2.76% +4.99% -7.44% +11.93% +10.51% +21.84%
AppendFloat/Float -21.98% -13.32% -16.50% -11.54% -33.37% -28.66% -15.64%
AppendFloat/Exp -32.44% -25.54% -28.85% -31.79% -39.60% -35.92% -20.89%
AppendFloat/NegExp -33.31% -25.91% -28.90% -31.29% -41.17% -35.52% -21.32%
AppendFloat/LongExp -19.35% -9.51% -15.29% -12.36% -30.46% -25.10% -10.18%
AppendFloat/Big -24.40% -15.84% -22.56% -24.05% -43.23% -36.28% -26.45%
AppendFloat/BinaryExp -0.52% -1.20% ~ ~ ~ +0.96% +1.94%
AppendFloat/32Integer -14.24% -7.01% -12.82% -18.99% -12.12% -10.85% -0.32%
AppendFloat/32ExactFraction -34.53% -28.47% -34.50% -30.50% -43.75% -38.73% -25.44%
AppendFloat/32Point -25.83% -18.54% -23.52% -21.26% -36.74% -33.11% -20.72%
AppendFloat/32Exp -37.55% -33.36% -37.74% -39.06% -51.37% -44.53% -31.76%
AppendFloat/32NegExp -35.99% -31.96% -36.02% -37.13% -44.62% -39.03% -26.91%
AppendFloat/32Shortest -23.25% -18.02% -21.41% -23.07% -35.56% -32.89% -20.13%
AppendFloat/32Fixed8Hard +1.09% -1.94% ~ ~ -2.33% -1.36% -0.10%
AppendFloat/32Fixed9Hard +1.45% -2.10% +0.10% ~ -4.20% -0.72% +1.31%
AppendFloat/64Fixed1 +0.45% ~ ~ -1.66% -3.74% -2.13% ~
AppendFloat/64Fixed2 +0.32% -0.92% +0.53% -1.75% -2.69% ~ -0.49%
AppendFloat/64Fixed2.5 +0.38% -0.38% ~ ~ -5.14% -1.15% -0.97%
AppendFloat/64Fixed3 +0.97% -0.53% ~ +0.23% -3.57% -4.04% -0.27%
AppendFloat/64Fixed4 +0.95% -2.77% +0.45% -1.57% -3.99% -2.58% -0.91%
AppendFloat/64Fixed5Hard +0.52% -1.22% ~ -0.87% -3.20% -1.60% +0.49%
AppendFloat/64Fixed12 +1.15% -0.62% ~ ~ -3.37% -1.43% -0.72%
AppendFloat/64Fixed16 +1.13% ~ -0.21% -0.59% -3.65% ~ +0.74%
AppendFloat/64Fixed12Hard +0.78% -1.26% ~ -0.95% -4.82% -2.98% +0.26%
AppendFloat/64Fixed17Hard ~ ~ -0.32% -6.34% -2.44% -2.19% +1.00%
AppendFloat/64Fixed18Hard ~ ~ ~ ~ ~ ~ +0.06%
AppendFloat/64FixedF1 +0.44% ~ +0.43% -1.87% -2.75% ~ -1.24%
AppendFloat/64FixedF2 +1.35% -1.04% +0.81% +1.26% -2.21% -2.36% ~
AppendFloat/64FixedF3 ~ -1.14% +0.39% -1.58% -3.46% ~ -1.08%
AppendFloat/Slowpath64 -15.51% -7.05% -14.59% -7.86% -22.54% -19.63% -5.90%
AppendFloat/SlowpathDenormal64 -15.10% -8.19% -14.62% -9.36% -26.86% -23.10% -14.48%
host: linux-amd64
goos: linux
goarch: amd64
pkg: internal/strconv
cpu: Intel(R) Xeon(R) CPU @ 2.30GHz
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-16 63.37n ± 0% 61.67n ± 0% -2.68% (p=0.000 n=20)
AppendFloat/Float-16 92.83n ± 0% 72.43n ± 0% -21.98% (p=0.000 n=20)
AppendFloat/Exp-16 98.60n ± 0% 66.61n ± 0% -32.44% (p=0.000 n=20)
AppendFloat/NegExp-16 100.15n ± 0% 66.79n ± 0% -33.31% (p=0.000 n=20)
AppendFloat/LongExp-16 105.35n ± 0% 84.96n ± 0% -19.35% (p=0.000 n=20)
AppendFloat/Big-16 108.50n ± 0% 82.03n ± 0% -24.40% (p=0.000 n=20)
AppendFloat/BinaryExp-16 47.27n ± 0% 47.03n ± 0% -0.52% (p=0.000 n=20)
AppendFloat/32Integer-16 63.29n ± 0% 54.28n ± 0% -14.24% (p=0.000 n=20)
AppendFloat/32ExactFraction-16 89.72n ± 0% 58.74n ± 0% -34.53% (p=0.000 n=20)
AppendFloat/32Point-16 87.32n ± 0% 64.77n ± 0% -25.83% (p=0.000 n=20)
AppendFloat/32Exp-16 94.89n ± 0% 59.26n ± 0% -37.55% (p=0.000 n=20)
AppendFloat/32NegExp-16 92.68n ± 0% 59.32n ± 0% -35.99% (p=0.000 n=20)
AppendFloat/32Shortest-16 82.12n ± 0% 63.04n ± 0% -23.25% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-16 57.76n ± 0% 58.38n ± 0% +1.09% (p=0.000 n=20)
AppendFloat/32Fixed9Hard-16 66.44n ± 0% 67.41n ± 0% +1.45% (p=0.000 n=20)
AppendFloat/64Fixed1-16 51.00n ± 0% 51.24n ± 0% +0.45% (p=0.000 n=20)
AppendFloat/64Fixed2-16 50.86n ± 0% 51.03n ± 0% +0.32% (p=0.000 n=20)
AppendFloat/64Fixed2.5-16 49.31n ± 0% 49.49n ± 0% +0.38% (p=0.000 n=20)
AppendFloat/64Fixed3-16 51.98n ± 0% 52.48n ± 0% +0.97% (p=0.000 n=20)
AppendFloat/64Fixed4-16 50.05n ± 0% 50.52n ± 0% +0.95% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-16 58.01n ± 0% 58.31n ± 0% +0.52% (p=0.000 n=20)
AppendFloat/64Fixed12-16 82.81n ± 0% 83.77n ± 0% +1.15% (p=0.000 n=20)
AppendFloat/64Fixed16-16 70.66n ± 0% 71.46n ± 0% +1.13% (p=0.000 n=20)
AppendFloat/64Fixed12Hard-16 68.25n ± 0% 68.79n ± 0% +0.78% (p=0.000 n=20)
AppendFloat/64Fixed17Hard-16 79.78n ± 0% 79.82n ± 0% ~ (p=0.136 n=20)
AppendFloat/64Fixed18Hard-16 4.881µ ± 0% 4.876µ ± 0% ~ (p=0.432 n=20)
AppendFloat/64FixedF1-16 68.74n ± 0% 69.04n ± 0% +0.44% (p=0.000 n=20)
AppendFloat/64FixedF2-16 57.36n ± 0% 58.13n ± 0% +1.35% (p=0.000 n=20)
AppendFloat/64FixedF3-16 52.59n ± 0% 52.77n ± 0% ~ (p=0.001 n=20)
AppendFloat/Slowpath64-16 99.56n ± 0% 84.12n ± 0% -15.51% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-16 97.35n ± 0% 82.65n ± 0% -15.10% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-16 56.27n ± 0%
AppendFloat/ShorterIntervalCase64-16 57.42n ± 0%
geomean 82.53n 71.80n -11.68%
host: s7
cpu: AMD Ryzen 9 7950X 16-Core Processor
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-32 22.30n ± 0% 22.91n ± 0% +2.76% (p=0.000 n=20)
AppendFloat/Float-32 34.54n ± 0% 29.94n ± 0% -13.32% (p=0.000 n=20)
AppendFloat/Exp-32 34.55n ± 0% 25.72n ± 0% -25.54% (p=0.000 n=20)
AppendFloat/NegExp-32 35.08n ± 0% 25.99n ± 1% -25.91% (p=0.000 n=20)
AppendFloat/LongExp-32 36.85n ± 0% 33.35n ± 1% -9.51% (p=0.000 n=20)
AppendFloat/Big-32 38.28n ± 0% 32.21n ± 1% -15.84% (p=0.000 n=20)
AppendFloat/BinaryExp-32 17.52n ± 0% 17.30n ± 0% -1.20% (p=0.000 n=20)
AppendFloat/32Integer-32 22.31n ± 0% 20.75n ± 0% -7.01% (p=0.000 n=20)
AppendFloat/32ExactFraction-32 32.74n ± 1% 23.41n ± 1% -28.47% (p=0.000 n=20)
AppendFloat/32Point-32 32.88n ± 0% 26.79n ± 0% -18.54% (p=0.000 n=20)
AppendFloat/32Exp-32 34.10n ± 0% 22.72n ± 1% -33.36% (p=0.000 n=20)
AppendFloat/32NegExp-32 33.17n ± 1% 22.57n ± 0% -31.96% (p=0.000 n=20)
AppendFloat/32Shortest-32 29.85n ± 1% 24.47n ± 0% -18.02% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-32 22.62n ± 1% 22.19n ± 1% -1.94% (p=0.000 n=20)
AppendFloat/32Fixed9Hard-32 25.75n ± 1% 25.21n ± 0% -2.10% (p=0.000 n=20)
AppendFloat/64Fixed1-32 19.02n ± 1% 18.98n ± 0% ~ (p=0.351 n=20)
AppendFloat/64Fixed2-32 18.94n ± 0% 18.76n ± 0% -0.92% (p=0.000 n=20)
AppendFloat/64Fixed2.5-32 18.23n ± 0% 18.16n ± 0% -0.38% (p=0.001 n=20)
AppendFloat/64Fixed3-32 19.79n ± 0% 19.68n ± 0% -0.53% (p=0.000 n=20)
AppendFloat/64Fixed4-32 18.93n ± 0% 18.40n ± 1% -2.77% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-32 21.81n ± 0% 21.54n ± 1% -1.22% (p=0.000 n=20)
AppendFloat/64Fixed12-32 30.58n ± 1% 30.39n ± 0% -0.62% (p=0.000 n=20)
AppendFloat/64Fixed16-32 26.98n ± 1% 26.80n ± 1% ~ (p=0.010 n=20)
AppendFloat/64Fixed12Hard-32 26.20n ± 0% 25.86n ± 1% -1.26% (p=0.000 n=20)
AppendFloat/64Fixed17Hard-32 30.01n ± 1% 30.10n ± 1% ~ (p=0.112 n=20)
AppendFloat/64Fixed18Hard-32 1.809µ ± 1% 1.806µ ± 0% ~ (p=0.713 n=20)
AppendFloat/64FixedF1-32 26.78n ± 1% 26.59n ± 0% ~ (p=0.005 n=20)
AppendFloat/64FixedF2-32 20.24n ± 1% 20.03n ± 0% -1.04% (p=0.000 n=20)
AppendFloat/64FixedF3-32 18.88n ± 0% 18.67n ± 0% -1.14% (p=0.000 n=20)
AppendFloat/Slowpath64-32 35.37n ± 0% 32.88n ± 1% -7.05% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-32 35.17n ± 0% 32.29n ± 1% -8.19% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-32 21.76n ± 0%
AppendFloat/ShorterIntervalCase64-32 22.11n ± 0%
geomean 30.34n 27.23n -8.96%
host: linux-arm64
goarch: arm64
cpu: unknown
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-8 60.08n ± 0% 63.07n ± 0% +4.99% (p=0.000 n=20)
AppendFloat/Float-8 88.53n ± 0% 73.92n ± 0% -16.50% (p=0.000 n=20)
AppendFloat/Exp-8 93.07n ± 0% 66.22n ± 0% -28.85% (p=0.000 n=20)
AppendFloat/NegExp-8 93.35n ± 0% 66.38n ± 0% -28.90% (p=0.000 n=20)
AppendFloat/LongExp-8 100.15n ± 0% 84.84n ± 0% -15.29% (p=0.000 n=20)
AppendFloat/Big-8 103.80n ± 0% 80.38n ± 0% -22.56% (p=0.000 n=20)
AppendFloat/BinaryExp-8 47.36n ± 0% 47.34n ± 0% ~ (p=0.033 n=20)
AppendFloat/32Integer-8 60.28n ± 0% 52.55n ± 0% -12.82% (p=0.000 n=20)
AppendFloat/32ExactFraction-8 86.11n ± 0% 56.40n ± 0% -34.50% (p=0.000 n=20)
AppendFloat/32Point-8 82.88n ± 0% 63.39n ± 0% -23.52% (p=0.000 n=20)
AppendFloat/32Exp-8 89.33n ± 0% 55.62n ± 0% -37.74% (p=0.000 n=20)
AppendFloat/32NegExp-8 87.48n ± 0% 55.97n ± 0% -36.02% (p=0.000 n=20)
AppendFloat/32Shortest-8 76.31n ± 0% 59.97n ± 0% -21.41% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-8 52.83n ± 0% 52.82n ± 0% ~ (p=0.370 n=20)
AppendFloat/32Fixed9Hard-8 60.90n ± 0% 60.96n ± 0% +0.10% (p=0.000 n=20)
AppendFloat/64Fixed1-8 46.96n ± 0% 46.95n ± 0% ~ (p=0.702 n=20)
AppendFloat/64Fixed2-8 46.96n ± 0% 47.21n ± 0% +0.53% (p=0.000 n=20)
AppendFloat/64Fixed2.5-8 44.24n ± 0% 44.29n ± 0% ~ (p=0.006 n=20)
AppendFloat/64Fixed3-8 47.73n ± 0% 47.78n ± 0% ~ (p=0.020 n=20)
AppendFloat/64Fixed4-8 44.40n ± 0% 44.60n ± 0% +0.45% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-8 52.52n ± 0% 52.50n ± 0% ~ (p=0.722 n=20)
AppendFloat/64Fixed12-8 78.57n ± 0% 78.56n ± 0% ~ (p=0.222 n=20)
AppendFloat/64Fixed16-8 65.36n ± 0% 65.22n ± 0% -0.21% (p=0.000 n=20)
AppendFloat/64Fixed12Hard-8 62.04n ± 0% 61.97n ± 0% ~ (p=0.004 n=20)
AppendFloat/64Fixed17Hard-8 74.30n ± 0% 74.06n ± 0% -0.32% (p=0.000 n=20)
AppendFloat/64Fixed18Hard-8 4.282µ ± 0% 4.284µ ± 0% ~ (p=0.296 n=20)
AppendFloat/64FixedF1-8 66.05n ± 0% 66.33n ± 0% +0.43% (p=0.000 n=20)
AppendFloat/64FixedF2-8 53.67n ± 0% 54.11n ± 0% +0.81% (p=0.000 n=20)
AppendFloat/64FixedF3-8 47.41n ± 0% 47.59n ± 0% +0.39% (p=0.000 n=20)
AppendFloat/Slowpath64-8 97.42n ± 0% 83.21n ± 0% -14.59% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-8 94.74n ± 0% 80.88n ± 0% -14.62% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-8 53.77n ± 0%
AppendFloat/ShorterIntervalCase64-8 55.22n ± 0%
geomean 77.14n 67.89n -10.73%
host: local
goos: darwin
cpu: Apple M3 Pro
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-12 21.09n ± 0% 19.52n ± 0% -7.44% (p=0.000 n=20)
AppendFloat/Float-12 32.36n ± 0% 28.63n ± 1% -11.54% (p=0.000 n=20)
AppendFloat/Exp-12 31.77n ± 0% 21.67n ± 0% -31.79% (p=0.000 n=20)
AppendFloat/NegExp-12 31.56n ± 1% 21.68n ± 0% -31.29% (p=0.000 n=20)
AppendFloat/LongExp-12 33.33n ± 0% 29.21n ± 0% -12.36% (p=0.000 n=20)
AppendFloat/Big-12 35.24n ± 1% 26.77n ± 0% -24.05% (p=0.000 n=20)
AppendFloat/BinaryExp-12 18.88n ± 1% 19.38n ± 2% ~ (p=0.031 n=20)
AppendFloat/32Integer-12 21.32n ± 1% 17.27n ± 0% -18.99% (p=0.000 n=20)
AppendFloat/32ExactFraction-12 30.85n ± 1% 21.44n ± 0% -30.50% (p=0.000 n=20)
AppendFloat/32Point-12 31.02n ± 1% 24.42n ± 0% -21.26% (p=0.000 n=20)
AppendFloat/32Exp-12 31.55n ± 0% 19.23n ± 0% -39.06% (p=0.000 n=20)
AppendFloat/32NegExp-12 30.32n ± 1% 19.06n ± 0% -37.13% (p=0.000 n=20)
AppendFloat/32Shortest-12 26.68n ± 0% 20.52n ± 0% -23.07% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-12 17.34n ± 1% 17.24n ± 0% ~ (p=0.017 n=20)
AppendFloat/32Fixed9Hard-12 19.05n ± 1% 19.25n ± 1% ~ (p=0.155 n=20)
AppendFloat/64Fixed1-12 15.66n ± 0% 15.40n ± 0% -1.66% (p=0.000 n=20)
AppendFloat/64Fixed2-12 15.39n ± 0% 15.12n ± 0% -1.75% (p=0.000 n=20)
AppendFloat/64Fixed2.5-12 15.14n ± 0% 15.14n ± 0% ~ (p=0.645 n=20)
AppendFloat/64Fixed3-12 15.53n ± 0% 15.56n ± 0% +0.23% (p=0.000 n=20)
AppendFloat/64Fixed4-12 15.28n ± 0% 15.04n ± 0% -1.57% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-12 18.32n ± 0% 18.16n ± 0% -0.87% (p=0.000 n=20)
AppendFloat/64Fixed12-12 25.51n ± 1% 25.48n ± 0% ~ (p=0.256 n=20)
AppendFloat/64Fixed16-12 21.32n ± 0% 21.20n ± 0% -0.59% (p=0.000 n=20)
AppendFloat/64Fixed12Hard-12 21.11n ± 1% 20.91n ± 1% -0.95% (p=0.001 n=20)
AppendFloat/64Fixed17Hard-12 26.89n ± 1% 25.18n ± 3% -6.34% (p=0.000 n=20)
AppendFloat/64Fixed18Hard-12 2.057µ ± 6% 2.065µ ± 1% ~ (p=0.856 n=20)
AppendFloat/64FixedF1-12 24.65n ± 0% 24.19n ± 0% -1.87% (p=0.000 n=20)
AppendFloat/64FixedF2-12 20.68n ± 0% 20.94n ± 0% +1.26% (p=0.000 n=20)
AppendFloat/64FixedF3-12 16.44n ± 0% 16.18n ± 0% -1.58% (p=0.000 n=20)
AppendFloat/Slowpath64-12 31.68n ± 0% 29.18n ± 0% -7.86% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-12 29.92n ± 1% 27.12n ± 0% -9.36% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-12 18.44n ± 1%
AppendFloat/ShorterIntervalCase64-12 18.57n ± 0%
geomean 26.90n 23.50n -11.27%
host: linux-386
goos: linux
goarch: 386
cpu: Intel(R) Xeon(R) CPU @ 2.30GHz
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-16 128.2n ± 0% 143.5n ± 0% +11.93% (p=0.000 n=20)
AppendFloat/Float-16 236.3n ± 0% 157.5n ± 0% -33.37% (p=0.000 n=20)
AppendFloat/Exp-16 245.3n ± 0% 148.2n ± 0% -39.60% (p=0.000 n=20)
AppendFloat/NegExp-16 251.2n ± 0% 147.8n ± 0% -41.17% (p=0.000 n=20)
AppendFloat/LongExp-16 253.2n ± 0% 176.0n ± 0% -30.46% (p=0.000 n=20)
AppendFloat/Big-16 278.6n ± 0% 158.1n ± 0% -43.23% (p=0.000 n=20)
AppendFloat/BinaryExp-16 89.72n ± 0% 89.47n ± 0% ~ (p=0.155 n=20)
AppendFloat/32Integer-16 127.1n ± 0% 111.7n ± 0% -12.12% (p=0.000 n=20)
AppendFloat/32ExactFraction-16 206.9n ± 1% 116.3n ± 1% -43.75% (p=0.000 n=20)
AppendFloat/32Point-16 196.9n ± 0% 124.5n ± 1% -36.74% (p=0.000 n=20)
AppendFloat/32Exp-16 235.1n ± 1% 114.3n ± 0% -51.37% (p=0.000 n=20)
AppendFloat/32NegExp-16 206.4n ± 0% 114.3n ± 1% -44.62% (p=0.000 n=20)
AppendFloat/32Shortest-16 189.7n ± 0% 122.3n ± 0% -35.56% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-16 137.2n ± 0% 134.0n ± 0% -2.33% (p=0.000 n=20)
AppendFloat/32Fixed9Hard-16 160.8n ± 0% 154.0n ± 0% -4.20% (p=0.000 n=20)
AppendFloat/64Fixed1-16 140.2n ± 0% 135.0n ± 0% -3.74% (p=0.000 n=20)
AppendFloat/64Fixed2-16 135.5n ± 0% 131.8n ± 0% -2.69% (p=0.000 n=20)
AppendFloat/64Fixed2.5-16 133.3n ± 0% 126.5n ± 0% -5.14% (p=0.000 n=20)
AppendFloat/64Fixed3-16 135.8n ± 0% 130.9n ± 0% -3.57% (p=0.000 n=20)
AppendFloat/64Fixed4-16 127.9n ± 0% 122.8n ± 0% -3.99% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-16 140.7n ± 0% 136.2n ± 0% -3.20% (p=0.000 n=20)
AppendFloat/64Fixed12-16 166.1n ± 0% 160.5n ± 0% -3.37% (p=0.000 n=20)
AppendFloat/64Fixed16-16 160.1n ± 0% 154.2n ± 0% -3.65% (p=0.000 n=20)
AppendFloat/64Fixed12Hard-16 156.6n ± 0% 149.0n ± 0% -4.82% (p=0.000 n=20)
AppendFloat/64Fixed17Hard-16 173.9n ± 1% 169.6n ± 0% -2.44% (p=0.000 n=20)
AppendFloat/64Fixed18Hard-16 10.59µ ± 1% 10.60µ ± 0% ~ (p=0.664 n=20)
AppendFloat/64FixedF1-16 158.5n ± 0% 154.1n ± 0% -2.75% (p=0.000 n=20)
AppendFloat/64FixedF2-16 147.1n ± 0% 143.8n ± 0% -2.21% (p=0.000 n=20)
AppendFloat/64FixedF3-16 135.8n ± 0% 131.1n ± 0% -3.46% (p=0.000 n=20)
AppendFloat/Slowpath64-16 244.9n ± 0% 189.7n ± 0% -22.54% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-16 241.8n ± 0% 176.9n ± 0% -26.86% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-16 114.9n ± 0%
AppendFloat/ShorterIntervalCase64-16 130.6n ± 0%
geomean 195.7n 157.4n -18.30%
host: s7:GOARCH=386
cpu: AMD Ryzen 9 7950X 16-Core Processor
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-32 42.76n ± 0% 47.25n ± 0% +10.51% (p=0.000 n=20)
AppendFloat/Float-32 71.44n ± 1% 50.97n ± 0% -28.66% (p=0.000 n=20)
AppendFloat/Exp-32 75.51n ± 0% 48.39n ± 1% -35.92% (p=0.000 n=20)
AppendFloat/NegExp-32 74.70n ± 0% 48.17n ± 1% -35.52% (p=0.000 n=20)
AppendFloat/LongExp-32 76.52n ± 0% 57.32n ± 1% -25.10% (p=0.000 n=20)
AppendFloat/Big-32 83.05n ± 0% 52.92n ± 1% -36.28% (p=0.000 n=20)
AppendFloat/BinaryExp-32 31.92n ± 1% 32.22n ± 0% +0.96% (p=0.000 n=20)
AppendFloat/32Integer-32 41.29n ± 1% 36.81n ± 0% -10.85% (p=0.000 n=20)
AppendFloat/32ExactFraction-32 62.29n ± 1% 38.16n ± 0% -38.73% (p=0.000 n=20)
AppendFloat/32Point-32 60.45n ± 1% 40.44n ± 1% -33.11% (p=0.000 n=20)
AppendFloat/32Exp-32 69.32n ± 1% 38.45n ± 1% -44.53% (p=0.000 n=20)
AppendFloat/32NegExp-32 63.39n ± 0% 38.64n ± 1% -39.03% (p=0.000 n=20)
AppendFloat/32Shortest-32 58.90n ± 1% 39.53n ± 0% -32.89% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-32 43.30n ± 0% 42.70n ± 1% -1.36% (p=0.000 n=20)
AppendFloat/32Fixed9Hard-32 49.96n ± 1% 49.60n ± 0% -0.72% (p=0.000 n=20)
AppendFloat/64Fixed1-32 42.99n ± 1% 42.08n ± 0% -2.13% (p=0.000 n=20)
AppendFloat/64Fixed2-32 41.58n ± 0% 41.42n ± 1% ~ (p=0.077 n=20)
AppendFloat/64Fixed2.5-32 40.47n ± 1% 40.00n ± 1% -1.15% (p=0.000 n=20)
AppendFloat/64Fixed3-32 43.43n ± 1% 41.67n ± 0% -4.04% (p=0.000 n=20)
AppendFloat/64Fixed4-32 40.44n ± 0% 39.40n ± 0% -2.58% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-32 43.41n ± 0% 42.72n ± 0% -1.60% (p=0.000 n=20)
AppendFloat/64Fixed12-32 52.00n ± 0% 51.26n ± 0% -1.43% (p=0.000 n=20)
AppendFloat/64Fixed16-32 50.62n ± 1% 50.55n ± 0% ~ (p=0.234 n=20)
AppendFloat/64Fixed12Hard-32 49.36n ± 0% 47.89n ± 0% -2.98% (p=0.000 n=20)
AppendFloat/64Fixed17Hard-32 56.91n ± 0% 55.66n ± 1% -2.19% (p=0.000 n=20)
AppendFloat/64Fixed18Hard-32 3.983µ ± 0% 3.964µ ± 0% ~ (p=0.014 n=20)
AppendFloat/64FixedF1-32 49.31n ± 1% 49.10n ± 1% ~ (p=0.005 n=20)
AppendFloat/64FixedF2-32 45.06n ± 0% 44.00n ± 1% -2.36% (p=0.000 n=20)
AppendFloat/64FixedF3-32 42.22n ± 0% 42.20n ± 1% ~ (p=0.644 n=20)
AppendFloat/Slowpath64-32 75.77n ± 0% 60.89n ± 1% -19.63% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-32 74.88n ± 1% 57.59n ± 1% -23.10% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-32 37.66n ± 1%
AppendFloat/ShorterIntervalCase64-32 42.49n ± 1%
geomean 61.34n 51.27n -15.08%
host: linux-arm
goarch: arm
cpu: ARMv8 Processor rev 1 (v8l)
│ 3c26aef8fb │ 8a958b0d9c1 │
│ sec/op │ sec/op vs base │
AppendFloat/Decimal-4 110.8n ± 0% 135.0n ± 0% +21.84% (p=0.000 n=20)
AppendFloat/Float-4 172.0n ± 0% 145.1n ± 0% -15.64% (p=0.000 n=20)
AppendFloat/Exp-4 172.1n ± 0% 136.2n ± 0% -20.89% (p=0.000 n=20)
AppendFloat/NegExp-4 172.6n ± 0% 135.8n ± 0% -21.32% (p=0.000 n=20)
AppendFloat/LongExp-4 180.2n ± 0% 161.9n ± 0% -10.18% (p=0.000 n=20)
AppendFloat/Big-4 195.5n ± 0% 143.8n ± 0% -26.45% (p=0.000 n=20)
AppendFloat/BinaryExp-4 84.75n ± 0% 86.40n ± 0% +1.94% (p=0.000 n=20)
AppendFloat/32Integer-4 110.4n ± 0% 110.0n ± 0% -0.32% (p=0.000 n=20)
AppendFloat/32ExactFraction-4 152.9n ± 0% 114.0n ± 0% -25.44% (p=0.000 n=20)
AppendFloat/32Point-4 151.5n ± 0% 120.1n ± 0% -20.72% (p=0.000 n=20)
AppendFloat/32Exp-4 163.1n ± 0% 111.3n ± 0% -31.76% (p=0.000 n=20)
AppendFloat/32NegExp-4 152.0n ± 0% 111.1n ± 0% -26.91% (p=0.000 n=20)
AppendFloat/32Shortest-4 145.8n ± 0% 116.5n ± 0% -20.13% (p=0.000 n=20)
AppendFloat/32Fixed8Hard-4 104.1n ± 0% 104.0n ± 0% -0.10% (p=0.000 n=20)
AppendFloat/32Fixed9Hard-4 114.2n ± 0% 115.7n ± 0% +1.31% (p=0.000 n=20)
AppendFloat/64Fixed1-4 97.35n ± 0% 97.31n ± 0% ~ (p=0.357 n=20)
AppendFloat/64Fixed2-4 95.74n ± 0% 95.28n ± 0% -0.49% (p=0.000 n=20)
AppendFloat/64Fixed2.5-4 94.24n ± 0% 93.32n ± 0% -0.97% (p=0.000 n=20)
AppendFloat/64Fixed3-4 95.56n ± 0% 95.30n ± 0% -0.27% (p=0.000 n=20)
AppendFloat/64Fixed4-4 92.36n ± 0% 91.52n ± 0% -0.91% (p=0.000 n=20)
AppendFloat/64Fixed5Hard-4 101.5n ± 0% 102.0n ± 0% +0.49% (p=0.000 n=20)
AppendFloat/64Fixed12-4 125.5n ± 0% 124.6n ± 0% -0.72% (p=0.000 n=20)
AppendFloat/64Fixed16-4 121.8n ± 0% 122.7n ± 0% +0.74% (p=0.000 n=20)
AppendFloat/64Fixed12Hard-4 116.1n ± 0% 116.4n ± 0% +0.26% (p=0.000 n=20)
AppendFloat/64Fixed17Hard-4 129.8n ± 0% 131.1n ± 0% +1.00% (p=0.000 n=20)
AppendFloat/64Fixed18Hard-4 7.945µ ± 0% 7.950µ ± 0% +0.06% (p=0.000 n=20)
AppendFloat/64FixedF1-4 112.8n ± 0% 111.4n ± 0% -1.24% (p=0.000 n=20)
AppendFloat/64FixedF2-4 100.6n ± 0% 100.5n ± 0% ~ (p=0.066 n=20)
AppendFloat/64FixedF3-4 96.45n ± 0% 95.41n ± 0% -1.08% (p=0.000 n=20)
AppendFloat/Slowpath64-4 176.3n ± 0% 165.9n ± 0% -5.90% (p=0.000 n=20)
AppendFloat/SlowpathDenormal64-4 178.2n ± 0% 152.4n ± 0% -14.48% (p=0.000 n=20)
AppendFloat/ShorterIntervalCase32-4 112.8n ± 0%
AppendFloat/ShorterIntervalCase64-4 119.0n ± 0%
geomean 144.6n 132.1n -7.84%
Change-Id: I1eb3c7b8756ad6cf938bc9b81180e01fd8a4cd9e
Reviewed-on: https://go-review.googlesource.com/c/go/+/723861
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
8d6d14f5d6
commit
6954be0baa
1 changed files with 0 additions and 307 deletions
|
|
@ -1,307 +0,0 @@
|
|||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package strconv
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// binary to decimal conversion using the Ryū algorithm.
|
||||
//
|
||||
// See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
|
||||
|
||||
// ryuFtoaShortest formats mant*2^exp with prec decimal digits.
|
||||
func ryuFtoaShortest(d *decimalSlice, mant uint64, exp int, flt *floatInfo) {
|
||||
if mant == 0 {
|
||||
d.nd, d.dp = 0, 0
|
||||
return
|
||||
}
|
||||
// If input is an exact integer with fewer bits than the mantissa,
|
||||
// the previous and next integer are not admissible representations.
|
||||
if exp <= 0 && bits.TrailingZeros64(mant) >= -exp {
|
||||
mant >>= uint(-exp)
|
||||
ryuDigits(d, mant, mant, mant, true, false)
|
||||
return
|
||||
}
|
||||
ml, mc, mu, e2 := computeBounds(mant, exp, flt)
|
||||
if e2 == 0 {
|
||||
ryuDigits(d, ml, mc, mu, true, false)
|
||||
return
|
||||
}
|
||||
// Find 10^q *larger* than 2^-e2
|
||||
q := mulLog10_2(-e2) + 1
|
||||
|
||||
// We are going to multiply by 10^q using 128-bit arithmetic.
|
||||
// The exponent is the same for all 3 numbers.
|
||||
var dl, dc, du uint64
|
||||
var dl0, dc0, du0 bool
|
||||
if flt == &float32info {
|
||||
var dl32, dc32, du32 uint32
|
||||
dl32, _, dl0 = mult64bitPow10(uint32(ml), e2, q)
|
||||
dc32, _, dc0 = mult64bitPow10(uint32(mc), e2, q)
|
||||
du32, e2, du0 = mult64bitPow10(uint32(mu), e2, q)
|
||||
dl, dc, du = uint64(dl32), uint64(dc32), uint64(du32)
|
||||
} else {
|
||||
dl, _, dl0 = mult128bitPow10(ml, e2, q)
|
||||
dc, _, dc0 = mult128bitPow10(mc, e2, q)
|
||||
du, e2, du0 = mult128bitPow10(mu, e2, q)
|
||||
}
|
||||
if e2 >= 0 {
|
||||
panic("not enough significant bits after mult128bitPow10")
|
||||
}
|
||||
// Is it an exact computation?
|
||||
if q > 55 {
|
||||
// Large positive powers of ten are not exact
|
||||
dl0, dc0, du0 = false, false, false
|
||||
}
|
||||
if q < 0 && q >= -24 {
|
||||
// Division by a power of ten may be exact.
|
||||
// (note that 5^25 is a 59-bit number so division by 5^25 is never exact).
|
||||
if divisiblePow5(ml, -q) {
|
||||
dl0 = true
|
||||
}
|
||||
if divisiblePow5(mc, -q) {
|
||||
dc0 = true
|
||||
}
|
||||
if divisiblePow5(mu, -q) {
|
||||
du0 = true
|
||||
}
|
||||
}
|
||||
// Express the results (dl, dc, du)*2^e2 as integers.
|
||||
// Extra bits must be removed and rounding hints computed.
|
||||
extra := uint(-e2)
|
||||
extraMask := uint64(1<<extra - 1)
|
||||
// Now compute the floored, integral base 10 mantissas.
|
||||
dl, fracl := dl>>extra, dl&extraMask
|
||||
dc, fracc := dc>>extra, dc&extraMask
|
||||
du, fracu := du>>extra, du&extraMask
|
||||
// Is it allowed to use 'du' as a result?
|
||||
// It is always allowed when it is truncated, but also
|
||||
// if it is exact and the original binary mantissa is even
|
||||
// When disallowed, we can subtract 1.
|
||||
uok := !du0 || fracu > 0
|
||||
if du0 && fracu == 0 {
|
||||
uok = mant&1 == 0
|
||||
}
|
||||
if !uok {
|
||||
du--
|
||||
}
|
||||
// Is 'dc' the correctly rounded base 10 mantissa?
|
||||
// The correct rounding might be dc+1
|
||||
cup := false // don't round up.
|
||||
if dc0 {
|
||||
// If we computed an exact product, the half integer
|
||||
// should round to next (even) integer if 'dc' is odd.
|
||||
cup = fracc > 1<<(extra-1) ||
|
||||
(fracc == 1<<(extra-1) && dc&1 == 1)
|
||||
} else {
|
||||
// otherwise, the result is a lower truncation of the ideal
|
||||
// result.
|
||||
cup = fracc>>(extra-1) == 1
|
||||
}
|
||||
// Is 'dl' an allowed representation?
|
||||
// Only if it is an exact value, and if the original binary mantissa
|
||||
// was even.
|
||||
lok := dl0 && fracl == 0 && (mant&1 == 0)
|
||||
if !lok {
|
||||
dl++
|
||||
}
|
||||
// We need to remember whether the trimmed digits of 'dc' are zero.
|
||||
c0 := dc0 && fracc == 0
|
||||
// render digits
|
||||
ryuDigits(d, dl, dc, du, c0, cup)
|
||||
d.dp -= q
|
||||
}
|
||||
|
||||
// computeBounds returns a floating-point vector (l, c, u)×2^e2
|
||||
// where the mantissas are 55-bit (or 26-bit) integers, describing the interval
|
||||
// represented by the input float64 or float32.
|
||||
func computeBounds(mant uint64, exp int, flt *floatInfo) (lower, central, upper uint64, e2 int) {
|
||||
if mant != 1<<flt.mantbits || exp == flt.bias+1-int(flt.mantbits) {
|
||||
// regular case (or denormals)
|
||||
lower, central, upper = 2*mant-1, 2*mant, 2*mant+1
|
||||
e2 = exp - 1
|
||||
return
|
||||
} else {
|
||||
// border of an exponent
|
||||
lower, central, upper = 4*mant-1, 4*mant, 4*mant+2
|
||||
e2 = exp - 2
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func ryuDigits(d *decimalSlice, lower, central, upper uint64,
|
||||
c0, cup bool) {
|
||||
lhi, llo := uint32(lower/1e9), uint32(lower%1e9)
|
||||
chi, clo := uint32(central/1e9), uint32(central%1e9)
|
||||
uhi, ulo := uint32(upper/1e9), uint32(upper%1e9)
|
||||
if uhi == 0 {
|
||||
// only low digits (for denormals)
|
||||
ryuDigits32(d, llo, clo, ulo, c0, cup, 8)
|
||||
} else if lhi < uhi {
|
||||
// truncate 9 digits at once.
|
||||
if llo != 0 {
|
||||
lhi++
|
||||
}
|
||||
c0 = c0 && clo == 0
|
||||
cup = (clo > 5e8) || (clo == 5e8 && cup)
|
||||
ryuDigits32(d, lhi, chi, uhi, c0, cup, 8)
|
||||
d.dp += 9
|
||||
} else {
|
||||
d.nd = 0
|
||||
// emit high part
|
||||
n := uint(9)
|
||||
for v := chi; v > 0; {
|
||||
v1, v2 := v/10, v%10
|
||||
v = v1
|
||||
n--
|
||||
d.d[n] = byte(v2 + '0')
|
||||
}
|
||||
d.d = d.d[n:]
|
||||
d.nd = int(9 - n)
|
||||
// emit low part
|
||||
ryuDigits32(d, llo, clo, ulo,
|
||||
c0, cup, d.nd+8)
|
||||
}
|
||||
// trim trailing zeros
|
||||
for d.nd > 0 && d.d[d.nd-1] == '0' {
|
||||
d.nd--
|
||||
}
|
||||
// trim initial zeros
|
||||
for d.nd > 0 && d.d[0] == '0' {
|
||||
d.nd--
|
||||
d.dp--
|
||||
d.d = d.d[1:]
|
||||
}
|
||||
}
|
||||
|
||||
// ryuDigits32 emits decimal digits for a number less than 1e9.
|
||||
func ryuDigits32(d *decimalSlice, lower, central, upper uint32,
|
||||
c0, cup bool, endindex int) {
|
||||
if upper == 0 {
|
||||
d.dp = endindex + 1
|
||||
return
|
||||
}
|
||||
trimmed := 0
|
||||
// Remember last trimmed digit to check for round-up.
|
||||
// c0 will be used to remember zeroness of following digits.
|
||||
cNextDigit := 0
|
||||
for upper > 0 {
|
||||
// Repeatedly compute:
|
||||
// l = Ceil(lower / 10^k)
|
||||
// c = Round(central / 10^k)
|
||||
// u = Floor(upper / 10^k)
|
||||
// and stop when c goes out of the (l, u) interval.
|
||||
l := (lower + 9) / 10
|
||||
c, cdigit := central/10, central%10
|
||||
u := upper / 10
|
||||
if l > u {
|
||||
// don't trim the last digit as it is forbidden to go below l
|
||||
// other, trim and exit now.
|
||||
break
|
||||
}
|
||||
// Check that we didn't cross the lower boundary.
|
||||
// The case where l < u but c == l-1 is essentially impossible,
|
||||
// but may happen if:
|
||||
// lower = ..11
|
||||
// central = ..19
|
||||
// upper = ..31
|
||||
// and means that 'central' is very close but less than
|
||||
// an integer ending with many zeros, and usually
|
||||
// the "round-up" logic hides the problem.
|
||||
if l == c+1 && c < u {
|
||||
c++
|
||||
cdigit = 0
|
||||
cup = false
|
||||
}
|
||||
trimmed++
|
||||
// Remember trimmed digits of c
|
||||
c0 = c0 && cNextDigit == 0
|
||||
cNextDigit = int(cdigit)
|
||||
lower, central, upper = l, c, u
|
||||
}
|
||||
// should we round up?
|
||||
if trimmed > 0 {
|
||||
cup = cNextDigit > 5 ||
|
||||
(cNextDigit == 5 && !c0) ||
|
||||
(cNextDigit == 5 && c0 && central&1 == 1)
|
||||
}
|
||||
if central < upper && cup {
|
||||
central++
|
||||
}
|
||||
// We know where the number ends, fill directly
|
||||
endindex -= trimmed
|
||||
v := central
|
||||
n := endindex
|
||||
for n > d.nd {
|
||||
v1, v2 := v/100, v%100
|
||||
d.d[n] = smalls[2*v2+1]
|
||||
d.d[n-1] = smalls[2*v2+0]
|
||||
n -= 2
|
||||
v = v1
|
||||
}
|
||||
if n == d.nd {
|
||||
d.d[n] = byte(v + '0')
|
||||
}
|
||||
d.nd = endindex + 1
|
||||
d.dp = d.nd + trimmed
|
||||
}
|
||||
|
||||
// mult64bitPow10 takes a floating-point input with a 25-bit
|
||||
// mantissa and multiplies it with 10^q. The resulting mantissa
|
||||
// is m*P >> 57 where P is a 64-bit truncated power of 10.
|
||||
// It is typically 31 or 32-bit wide.
|
||||
// The returned boolean is true if all trimmed bits were zero.
|
||||
//
|
||||
// That is:
|
||||
//
|
||||
// m*2^e2 * round(10^q) = resM * 2^resE + ε
|
||||
// exact = ε == 0
|
||||
func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
|
||||
if q == 0 {
|
||||
// P == 1<<63
|
||||
return m << 6, e2 - 6, true
|
||||
}
|
||||
pow, exp2, ok := pow10(q)
|
||||
if !ok {
|
||||
// This never happens due to the range of float32/float64 exponent
|
||||
panic("mult64bitPow10: power of 10 is out of range")
|
||||
}
|
||||
if q < 0 {
|
||||
// Inverse powers of ten must be rounded up.
|
||||
pow.Hi++
|
||||
}
|
||||
hi, lo := bits.Mul64(uint64(m), pow.Hi)
|
||||
e2 += exp2 - 64 + 57
|
||||
return uint32(hi<<7 | lo>>57), e2, lo<<7 == 0
|
||||
}
|
||||
|
||||
// mult128bitPow10 takes a floating-point input with a 55-bit
|
||||
// mantissa and multiplies it with 10^q. The resulting mantissa
|
||||
// is m*P >> 119 where P is a 128-bit truncated power of 10.
|
||||
// It is typically 63 or 64-bit wide.
|
||||
// The returned boolean is true is all trimmed bits were zero.
|
||||
//
|
||||
// That is:
|
||||
//
|
||||
// m*2^e2 * round(10^q) = resM * 2^resE + ε
|
||||
// exact = ε == 0
|
||||
func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) {
|
||||
if q == 0 {
|
||||
// P == 1<<127
|
||||
return m << 8, e2 - 8, true
|
||||
}
|
||||
pow, exp2, ok := pow10(q)
|
||||
if !ok {
|
||||
// This never happens due to the range of float32/float64 exponent
|
||||
panic("mult128bitPow10: power of 10 is out of range")
|
||||
}
|
||||
if q < 0 {
|
||||
// Inverse powers of ten must be rounded up.
|
||||
pow.Lo++
|
||||
}
|
||||
e2 += exp2 - 128 + 119
|
||||
|
||||
hi, mid, lo := umul192(m, pow)
|
||||
return hi<<9 | mid>>55, e2, mid<<9 == 0 && lo == 0
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue