go/src/math/rand/v2/rand.go

388 lines
13 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package rand implements pseudo-random number generators suitable for tasks
// such as simulation, but it should not be used for security-sensitive work.
//
// Random numbers are generated by a [Source], usually wrapped in a [Rand].
// Both types should be used by a single goroutine at a time: sharing among
// multiple goroutines requires some kind of synchronization.
//
// Top-level functions, such as [Float64] and [Int],
// are safe for concurrent use by multiple goroutines.
//
// This package's outputs might be easily predictable regardless of how it's
// seeded. For random numbers suitable for security-sensitive work, see the
// crypto/rand package.
package rand
import (
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
"math/bits"
_ "unsafe" // for go:linkname
)
math/rand/v2: change Source to use uint64 This should make Uint64-using functions faster and leave other things alone. It is a mystery why so much got faster. A good cautionary tale not to read too much into minor jitter in the benchmarks. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.amd64 │ 11ad9fdddc.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.555n ± 1% 1.335n ± 1% -14.15% (p=0.000 n=20) GlobalInt64-32 2.071n ± 1% 2.046n ± 1% ~ (p=0.016 n=20) GlobalInt63Parallel-32 0.1023n ± 1% 0.1037n ± 1% +1.37% (p=0.002 n=20) GlobalUint64-32 5.193n ± 1% 2.075n ± 0% -60.06% (p=0.000 n=20) GlobalUint64Parallel-32 0.2341n ± 0% 0.1013n ± 1% -56.74% (p=0.000 n=20) Int64-32 2.056n ± 2% 1.726n ± 2% -16.10% (p=0.000 n=20) Uint64-32 2.077n ± 2% 1.673n ± 1% -19.46% (p=0.000 n=20) GlobalIntN1000-32 4.077n ± 2% 3.895n ± 2% -4.45% (p=0.000 n=20) IntN1000-32 3.476n ± 2% 3.403n ± 1% -2.10% (p=0.000 n=20) Int64N1000-32 3.059n ± 1% 3.053n ± 2% ~ (p=0.131 n=20) Int64N1e8-32 2.942n ± 1% 2.718n ± 1% -7.60% (p=0.000 n=20) Int64N1e9-32 2.932n ± 1% 2.712n ± 1% -7.50% (p=0.000 n=20) Int64N2e9-32 2.925n ± 1% 2.690n ± 1% -8.03% (p=0.000 n=20) Int64N1e18-32 3.116n ± 1% 3.084n ± 2% ~ (p=0.425 n=20) Int64N2e18-32 4.067n ± 1% 4.026n ± 1% -1.02% (p=0.007 n=20) Int64N4e18-32 4.054n ± 1% 4.049n ± 2% ~ (p=0.204 n=20) Int32N1000-32 2.951n ± 1% 2.730n ± 0% -7.49% (p=0.000 n=20) Int32N1e8-32 3.102n ± 1% 2.916n ± 2% -6.03% (p=0.000 n=20) Int32N1e9-32 3.535n ± 1% 3.375n ± 1% -4.54% (p=0.000 n=20) Int32N2e9-32 3.514n ± 1% 3.292n ± 1% -6.30% (p=0.000 n=20) Float32-32 2.760n ± 1% 2.673n ± 1% -3.13% (p=0.000 n=20) Float64-32 2.284n ± 1% 2.485n ± 1% +8.80% (p=0.000 n=20) ExpFloat64-32 3.757n ± 1% 3.577n ± 2% -4.78% (p=0.000 n=20) NormFloat64-32 3.837n ± 1% 3.797n ± 2% ~ (p=0.204 n=20) Perm3-32 35.23n ± 2% 35.79n ± 2% ~ (p=0.298 n=20) Perm30-32 208.8n ± 1% 205.1n ± 1% -1.82% (p=0.000 n=20) Perm30ViaShuffle-32 111.7n ± 1% 111.2n ± 2% ~ (p=0.273 n=20) ShuffleOverhead-32 101.1n ± 1% 100.5n ± 2% ~ (p=0.878 n=20) Concurrent-32 2.108n ± 7% 2.188n ± 5% ~ (p=0.417 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 │ 220860f76f.arm64 │ 11ad9fdddc.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.316n ± 1% 2.272n ± 1% -1.86% (p=0.000 n=20) GlobalInt64-8 2.183n ± 1% 2.155n ± 1% ~ (p=0.122 n=20) GlobalInt63Parallel-8 0.4331n ± 0% 0.4352n ± 0% +0.48% (p=0.000 n=20) GlobalUint64-8 4.377n ± 2% 2.173n ± 1% -50.35% (p=0.000 n=20) GlobalUint64Parallel-8 0.9237n ± 0% 0.4340n ± 0% -53.02% (p=0.000 n=20) Int64-8 2.538n ± 1% 2.544n ± 1% ~ (p=0.189 n=20) Uint64-8 2.604n ± 1% 2.552n ± 1% -1.98% (p=0.000 n=20) GlobalIntN1000-8 3.857n ± 2% 3.856n ± 0% ~ (p=0.051 n=20) IntN1000-8 3.822n ± 2% 3.820n ± 0% -0.05% (p=0.001 n=20) Int64N1000-8 3.318n ± 0% 3.219n ± 2% -2.98% (p=0.000 n=20) Int64N1e8-8 3.349n ± 1% 3.221n ± 2% -3.79% (p=0.000 n=20) Int64N1e9-8 3.317n ± 2% 3.276n ± 2% -1.24% (p=0.001 n=20) Int64N2e9-8 3.317n ± 2% 3.217n ± 0% -3.01% (p=0.000 n=20) Int64N1e18-8 3.542n ± 1% 3.502n ± 2% -1.16% (p=0.001 n=20) Int64N2e18-8 5.087n ± 0% 4.968n ± 1% -2.33% (p=0.000 n=20) Int64N4e18-8 5.084n ± 0% 4.963n ± 0% -2.39% (p=0.000 n=20) Int32N1000-8 3.208n ± 2% 3.189n ± 1% -0.58% (p=0.001 n=20) Int32N1e8-8 3.610n ± 1% 3.514n ± 1% -2.67% (p=0.000 n=20) Int32N1e9-8 4.235n ± 0% 4.133n ± 0% -2.40% (p=0.000 n=20) Int32N2e9-8 4.229n ± 1% 4.137n ± 0% -2.19% (p=0.000 n=20) Float32-8 3.468n ± 0% 3.468n ± 1% ~ (p=0.350 n=20) Float64-8 3.447n ± 0% 3.478n ± 0% +0.90% (p=0.000 n=20) ExpFloat64-8 4.567n ± 0% 4.563n ± 0% -0.10% (p=0.002 n=20) NormFloat64-8 4.821n ± 0% 4.768n ± 0% -1.09% (p=0.000 n=20) Perm3-8 28.89n ± 0% 28.94n ± 0% +0.17% (p=0.000 n=20) Perm30-8 175.7n ± 0% 175.9n ± 0% +0.14% (p=0.000 n=20) Perm30ViaShuffle-8 153.5n ± 0% 152.6n ± 1% ~ (p=0.010 n=20) ShuffleOverhead-8 119.8n ± 1% 119.6n ± 1% ~ (p=0.147 n=20) Concurrent-8 2.433n ± 3% 2.452n ± 3% ~ (p=0.616 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.386 │ 11ad9fdddc.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.370n ± 1% 2.091n ± 1% -11.75% (p=0.000 n=20) GlobalInt64-32 3.569n ± 1% 3.514n ± 2% -1.56% (p=0.000 n=20) GlobalInt63Parallel-32 0.3221n ± 1% 0.3197n ± 0% -0.76% (p=0.000 n=20) GlobalUint64-32 8.797n ± 10% 3.542n ± 1% -59.74% (p=0.000 n=20) GlobalUint64Parallel-32 0.6351n ± 0% 0.3218n ± 0% -49.33% (p=0.000 n=20) Int64-32 2.612n ± 2% 2.552n ± 2% -2.30% (p=0.000 n=20) Uint64-32 3.350n ± 1% 2.566n ± 1% -23.42% (p=0.000 n=20) GlobalIntN1000-32 5.892n ± 1% 5.965n ± 2% ~ (p=0.082 n=20) IntN1000-32 4.546n ± 1% 4.652n ± 1% +2.33% (p=0.000 n=20) Int64N1000-32 14.59n ± 1% 14.48n ± 1% ~ (p=0.652 n=20) Int64N1e8-32 14.76n ± 2% 14.67n ± 1% ~ (p=0.836 n=20) Int64N1e9-32 16.57n ± 1% 16.80n ± 2% ~ (p=0.016 n=20) Int64N2e9-32 14.54n ± 1% 14.52n ± 1% ~ (p=0.533 n=20) Int64N1e18-32 16.14n ± 1% 16.16n ± 1% ~ (p=0.606 n=20) Int64N2e18-32 18.10n ± 1% 17.95n ± 2% ~ (p=0.062 n=20) Int64N4e18-32 18.65n ± 1% 18.35n ± 2% -1.61% (p=0.010 n=20) Int32N1000-32 3.560n ± 1% 3.608n ± 1% +1.33% (p=0.001 n=20) Int32N1e8-32 3.770n ± 2% 3.767n ± 1% ~ (p=0.155 n=20) Int32N1e9-32 4.098n ± 0% 4.130n ± 2% ~ (p=0.016 n=20) Int32N2e9-32 4.179n ± 1% 4.206n ± 1% ~ (p=0.011 n=20) Float32-32 21.18n ± 4% 22.18n ± 4% +4.70% (p=0.003 n=20) Float64-32 20.60n ± 2% 20.75n ± 4% +0.73% (p=0.000 n=20) ExpFloat64-32 13.07n ± 0% 12.58n ± 3% -3.82% (p=0.000 n=20) NormFloat64-32 7.738n ± 2% 7.920n ± 3% ~ (p=0.066 n=20) Perm3-32 36.73n ± 1% 40.27n ± 1% +9.65% (p=0.000 n=20) Perm30-32 211.9n ± 1% 213.2n ± 2% ~ (p=0.262 n=20) Perm30ViaShuffle-32 165.2n ± 1% 164.2n ± 2% ~ (p=0.029 n=20) ShuffleOverhead-32 133.9n ± 1% 134.7n ± 2% ~ (p=0.551 n=20) Concurrent-32 3.287n ± 2% 3.301n ± 2% ~ (p=0.330 n=20) For #61716. Change-Id: I8d2f73f87dd3603a0c2ff069988938e0957b6904 Reviewed-on: https://go-review.googlesource.com/c/go/+/502499 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Rob Pike <r@golang.org>
2023-06-06 09:16:34 -04:00
// A Source is a source of uniformly-distributed
// pseudo-random uint64 values in the range [0, 1<<64).
//
// A Source is not safe for concurrent use by multiple goroutines.
type Source interface {
Uint64() uint64
}
// NewSource returns a new pseudo-random Source seeded with the given value.
// Unlike the default Source used by top-level functions, this source is not
// safe for concurrent use by multiple goroutines.
// The returned Source implements Source64.
func NewSource(seed int64) Source {
return newSource(seed)
}
func newSource(seed int64) *rngSource {
var rng rngSource
rng.Seed(seed)
return &rng
}
// A Rand is a source of random numbers.
type Rand struct {
src Source
}
// New returns a new Rand that uses random values from src
// to generate other random values.
func New(src Source) *Rand {
math/rand/v2: change Source to use uint64 This should make Uint64-using functions faster and leave other things alone. It is a mystery why so much got faster. A good cautionary tale not to read too much into minor jitter in the benchmarks. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.amd64 │ 11ad9fdddc.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.555n ± 1% 1.335n ± 1% -14.15% (p=0.000 n=20) GlobalInt64-32 2.071n ± 1% 2.046n ± 1% ~ (p=0.016 n=20) GlobalInt63Parallel-32 0.1023n ± 1% 0.1037n ± 1% +1.37% (p=0.002 n=20) GlobalUint64-32 5.193n ± 1% 2.075n ± 0% -60.06% (p=0.000 n=20) GlobalUint64Parallel-32 0.2341n ± 0% 0.1013n ± 1% -56.74% (p=0.000 n=20) Int64-32 2.056n ± 2% 1.726n ± 2% -16.10% (p=0.000 n=20) Uint64-32 2.077n ± 2% 1.673n ± 1% -19.46% (p=0.000 n=20) GlobalIntN1000-32 4.077n ± 2% 3.895n ± 2% -4.45% (p=0.000 n=20) IntN1000-32 3.476n ± 2% 3.403n ± 1% -2.10% (p=0.000 n=20) Int64N1000-32 3.059n ± 1% 3.053n ± 2% ~ (p=0.131 n=20) Int64N1e8-32 2.942n ± 1% 2.718n ± 1% -7.60% (p=0.000 n=20) Int64N1e9-32 2.932n ± 1% 2.712n ± 1% -7.50% (p=0.000 n=20) Int64N2e9-32 2.925n ± 1% 2.690n ± 1% -8.03% (p=0.000 n=20) Int64N1e18-32 3.116n ± 1% 3.084n ± 2% ~ (p=0.425 n=20) Int64N2e18-32 4.067n ± 1% 4.026n ± 1% -1.02% (p=0.007 n=20) Int64N4e18-32 4.054n ± 1% 4.049n ± 2% ~ (p=0.204 n=20) Int32N1000-32 2.951n ± 1% 2.730n ± 0% -7.49% (p=0.000 n=20) Int32N1e8-32 3.102n ± 1% 2.916n ± 2% -6.03% (p=0.000 n=20) Int32N1e9-32 3.535n ± 1% 3.375n ± 1% -4.54% (p=0.000 n=20) Int32N2e9-32 3.514n ± 1% 3.292n ± 1% -6.30% (p=0.000 n=20) Float32-32 2.760n ± 1% 2.673n ± 1% -3.13% (p=0.000 n=20) Float64-32 2.284n ± 1% 2.485n ± 1% +8.80% (p=0.000 n=20) ExpFloat64-32 3.757n ± 1% 3.577n ± 2% -4.78% (p=0.000 n=20) NormFloat64-32 3.837n ± 1% 3.797n ± 2% ~ (p=0.204 n=20) Perm3-32 35.23n ± 2% 35.79n ± 2% ~ (p=0.298 n=20) Perm30-32 208.8n ± 1% 205.1n ± 1% -1.82% (p=0.000 n=20) Perm30ViaShuffle-32 111.7n ± 1% 111.2n ± 2% ~ (p=0.273 n=20) ShuffleOverhead-32 101.1n ± 1% 100.5n ± 2% ~ (p=0.878 n=20) Concurrent-32 2.108n ± 7% 2.188n ± 5% ~ (p=0.417 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 │ 220860f76f.arm64 │ 11ad9fdddc.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.316n ± 1% 2.272n ± 1% -1.86% (p=0.000 n=20) GlobalInt64-8 2.183n ± 1% 2.155n ± 1% ~ (p=0.122 n=20) GlobalInt63Parallel-8 0.4331n ± 0% 0.4352n ± 0% +0.48% (p=0.000 n=20) GlobalUint64-8 4.377n ± 2% 2.173n ± 1% -50.35% (p=0.000 n=20) GlobalUint64Parallel-8 0.9237n ± 0% 0.4340n ± 0% -53.02% (p=0.000 n=20) Int64-8 2.538n ± 1% 2.544n ± 1% ~ (p=0.189 n=20) Uint64-8 2.604n ± 1% 2.552n ± 1% -1.98% (p=0.000 n=20) GlobalIntN1000-8 3.857n ± 2% 3.856n ± 0% ~ (p=0.051 n=20) IntN1000-8 3.822n ± 2% 3.820n ± 0% -0.05% (p=0.001 n=20) Int64N1000-8 3.318n ± 0% 3.219n ± 2% -2.98% (p=0.000 n=20) Int64N1e8-8 3.349n ± 1% 3.221n ± 2% -3.79% (p=0.000 n=20) Int64N1e9-8 3.317n ± 2% 3.276n ± 2% -1.24% (p=0.001 n=20) Int64N2e9-8 3.317n ± 2% 3.217n ± 0% -3.01% (p=0.000 n=20) Int64N1e18-8 3.542n ± 1% 3.502n ± 2% -1.16% (p=0.001 n=20) Int64N2e18-8 5.087n ± 0% 4.968n ± 1% -2.33% (p=0.000 n=20) Int64N4e18-8 5.084n ± 0% 4.963n ± 0% -2.39% (p=0.000 n=20) Int32N1000-8 3.208n ± 2% 3.189n ± 1% -0.58% (p=0.001 n=20) Int32N1e8-8 3.610n ± 1% 3.514n ± 1% -2.67% (p=0.000 n=20) Int32N1e9-8 4.235n ± 0% 4.133n ± 0% -2.40% (p=0.000 n=20) Int32N2e9-8 4.229n ± 1% 4.137n ± 0% -2.19% (p=0.000 n=20) Float32-8 3.468n ± 0% 3.468n ± 1% ~ (p=0.350 n=20) Float64-8 3.447n ± 0% 3.478n ± 0% +0.90% (p=0.000 n=20) ExpFloat64-8 4.567n ± 0% 4.563n ± 0% -0.10% (p=0.002 n=20) NormFloat64-8 4.821n ± 0% 4.768n ± 0% -1.09% (p=0.000 n=20) Perm3-8 28.89n ± 0% 28.94n ± 0% +0.17% (p=0.000 n=20) Perm30-8 175.7n ± 0% 175.9n ± 0% +0.14% (p=0.000 n=20) Perm30ViaShuffle-8 153.5n ± 0% 152.6n ± 1% ~ (p=0.010 n=20) ShuffleOverhead-8 119.8n ± 1% 119.6n ± 1% ~ (p=0.147 n=20) Concurrent-8 2.433n ± 3% 2.452n ± 3% ~ (p=0.616 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.386 │ 11ad9fdddc.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.370n ± 1% 2.091n ± 1% -11.75% (p=0.000 n=20) GlobalInt64-32 3.569n ± 1% 3.514n ± 2% -1.56% (p=0.000 n=20) GlobalInt63Parallel-32 0.3221n ± 1% 0.3197n ± 0% -0.76% (p=0.000 n=20) GlobalUint64-32 8.797n ± 10% 3.542n ± 1% -59.74% (p=0.000 n=20) GlobalUint64Parallel-32 0.6351n ± 0% 0.3218n ± 0% -49.33% (p=0.000 n=20) Int64-32 2.612n ± 2% 2.552n ± 2% -2.30% (p=0.000 n=20) Uint64-32 3.350n ± 1% 2.566n ± 1% -23.42% (p=0.000 n=20) GlobalIntN1000-32 5.892n ± 1% 5.965n ± 2% ~ (p=0.082 n=20) IntN1000-32 4.546n ± 1% 4.652n ± 1% +2.33% (p=0.000 n=20) Int64N1000-32 14.59n ± 1% 14.48n ± 1% ~ (p=0.652 n=20) Int64N1e8-32 14.76n ± 2% 14.67n ± 1% ~ (p=0.836 n=20) Int64N1e9-32 16.57n ± 1% 16.80n ± 2% ~ (p=0.016 n=20) Int64N2e9-32 14.54n ± 1% 14.52n ± 1% ~ (p=0.533 n=20) Int64N1e18-32 16.14n ± 1% 16.16n ± 1% ~ (p=0.606 n=20) Int64N2e18-32 18.10n ± 1% 17.95n ± 2% ~ (p=0.062 n=20) Int64N4e18-32 18.65n ± 1% 18.35n ± 2% -1.61% (p=0.010 n=20) Int32N1000-32 3.560n ± 1% 3.608n ± 1% +1.33% (p=0.001 n=20) Int32N1e8-32 3.770n ± 2% 3.767n ± 1% ~ (p=0.155 n=20) Int32N1e9-32 4.098n ± 0% 4.130n ± 2% ~ (p=0.016 n=20) Int32N2e9-32 4.179n ± 1% 4.206n ± 1% ~ (p=0.011 n=20) Float32-32 21.18n ± 4% 22.18n ± 4% +4.70% (p=0.003 n=20) Float64-32 20.60n ± 2% 20.75n ± 4% +0.73% (p=0.000 n=20) ExpFloat64-32 13.07n ± 0% 12.58n ± 3% -3.82% (p=0.000 n=20) NormFloat64-32 7.738n ± 2% 7.920n ± 3% ~ (p=0.066 n=20) Perm3-32 36.73n ± 1% 40.27n ± 1% +9.65% (p=0.000 n=20) Perm30-32 211.9n ± 1% 213.2n ± 2% ~ (p=0.262 n=20) Perm30ViaShuffle-32 165.2n ± 1% 164.2n ± 2% ~ (p=0.029 n=20) ShuffleOverhead-32 133.9n ± 1% 134.7n ± 2% ~ (p=0.551 n=20) Concurrent-32 3.287n ± 2% 3.301n ± 2% ~ (p=0.330 n=20) For #61716. Change-Id: I8d2f73f87dd3603a0c2ff069988938e0957b6904 Reviewed-on: https://go-review.googlesource.com/c/go/+/502499 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Rob Pike <r@golang.org>
2023-06-06 09:16:34 -04:00
return &Rand{src: src}
}
// Int64 returns a non-negative pseudo-random 63-bit integer as an int64.
math/rand/v2: change Source to use uint64 This should make Uint64-using functions faster and leave other things alone. It is a mystery why so much got faster. A good cautionary tale not to read too much into minor jitter in the benchmarks. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.amd64 │ 11ad9fdddc.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.555n ± 1% 1.335n ± 1% -14.15% (p=0.000 n=20) GlobalInt64-32 2.071n ± 1% 2.046n ± 1% ~ (p=0.016 n=20) GlobalInt63Parallel-32 0.1023n ± 1% 0.1037n ± 1% +1.37% (p=0.002 n=20) GlobalUint64-32 5.193n ± 1% 2.075n ± 0% -60.06% (p=0.000 n=20) GlobalUint64Parallel-32 0.2341n ± 0% 0.1013n ± 1% -56.74% (p=0.000 n=20) Int64-32 2.056n ± 2% 1.726n ± 2% -16.10% (p=0.000 n=20) Uint64-32 2.077n ± 2% 1.673n ± 1% -19.46% (p=0.000 n=20) GlobalIntN1000-32 4.077n ± 2% 3.895n ± 2% -4.45% (p=0.000 n=20) IntN1000-32 3.476n ± 2% 3.403n ± 1% -2.10% (p=0.000 n=20) Int64N1000-32 3.059n ± 1% 3.053n ± 2% ~ (p=0.131 n=20) Int64N1e8-32 2.942n ± 1% 2.718n ± 1% -7.60% (p=0.000 n=20) Int64N1e9-32 2.932n ± 1% 2.712n ± 1% -7.50% (p=0.000 n=20) Int64N2e9-32 2.925n ± 1% 2.690n ± 1% -8.03% (p=0.000 n=20) Int64N1e18-32 3.116n ± 1% 3.084n ± 2% ~ (p=0.425 n=20) Int64N2e18-32 4.067n ± 1% 4.026n ± 1% -1.02% (p=0.007 n=20) Int64N4e18-32 4.054n ± 1% 4.049n ± 2% ~ (p=0.204 n=20) Int32N1000-32 2.951n ± 1% 2.730n ± 0% -7.49% (p=0.000 n=20) Int32N1e8-32 3.102n ± 1% 2.916n ± 2% -6.03% (p=0.000 n=20) Int32N1e9-32 3.535n ± 1% 3.375n ± 1% -4.54% (p=0.000 n=20) Int32N2e9-32 3.514n ± 1% 3.292n ± 1% -6.30% (p=0.000 n=20) Float32-32 2.760n ± 1% 2.673n ± 1% -3.13% (p=0.000 n=20) Float64-32 2.284n ± 1% 2.485n ± 1% +8.80% (p=0.000 n=20) ExpFloat64-32 3.757n ± 1% 3.577n ± 2% -4.78% (p=0.000 n=20) NormFloat64-32 3.837n ± 1% 3.797n ± 2% ~ (p=0.204 n=20) Perm3-32 35.23n ± 2% 35.79n ± 2% ~ (p=0.298 n=20) Perm30-32 208.8n ± 1% 205.1n ± 1% -1.82% (p=0.000 n=20) Perm30ViaShuffle-32 111.7n ± 1% 111.2n ± 2% ~ (p=0.273 n=20) ShuffleOverhead-32 101.1n ± 1% 100.5n ± 2% ~ (p=0.878 n=20) Concurrent-32 2.108n ± 7% 2.188n ± 5% ~ (p=0.417 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 │ 220860f76f.arm64 │ 11ad9fdddc.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.316n ± 1% 2.272n ± 1% -1.86% (p=0.000 n=20) GlobalInt64-8 2.183n ± 1% 2.155n ± 1% ~ (p=0.122 n=20) GlobalInt63Parallel-8 0.4331n ± 0% 0.4352n ± 0% +0.48% (p=0.000 n=20) GlobalUint64-8 4.377n ± 2% 2.173n ± 1% -50.35% (p=0.000 n=20) GlobalUint64Parallel-8 0.9237n ± 0% 0.4340n ± 0% -53.02% (p=0.000 n=20) Int64-8 2.538n ± 1% 2.544n ± 1% ~ (p=0.189 n=20) Uint64-8 2.604n ± 1% 2.552n ± 1% -1.98% (p=0.000 n=20) GlobalIntN1000-8 3.857n ± 2% 3.856n ± 0% ~ (p=0.051 n=20) IntN1000-8 3.822n ± 2% 3.820n ± 0% -0.05% (p=0.001 n=20) Int64N1000-8 3.318n ± 0% 3.219n ± 2% -2.98% (p=0.000 n=20) Int64N1e8-8 3.349n ± 1% 3.221n ± 2% -3.79% (p=0.000 n=20) Int64N1e9-8 3.317n ± 2% 3.276n ± 2% -1.24% (p=0.001 n=20) Int64N2e9-8 3.317n ± 2% 3.217n ± 0% -3.01% (p=0.000 n=20) Int64N1e18-8 3.542n ± 1% 3.502n ± 2% -1.16% (p=0.001 n=20) Int64N2e18-8 5.087n ± 0% 4.968n ± 1% -2.33% (p=0.000 n=20) Int64N4e18-8 5.084n ± 0% 4.963n ± 0% -2.39% (p=0.000 n=20) Int32N1000-8 3.208n ± 2% 3.189n ± 1% -0.58% (p=0.001 n=20) Int32N1e8-8 3.610n ± 1% 3.514n ± 1% -2.67% (p=0.000 n=20) Int32N1e9-8 4.235n ± 0% 4.133n ± 0% -2.40% (p=0.000 n=20) Int32N2e9-8 4.229n ± 1% 4.137n ± 0% -2.19% (p=0.000 n=20) Float32-8 3.468n ± 0% 3.468n ± 1% ~ (p=0.350 n=20) Float64-8 3.447n ± 0% 3.478n ± 0% +0.90% (p=0.000 n=20) ExpFloat64-8 4.567n ± 0% 4.563n ± 0% -0.10% (p=0.002 n=20) NormFloat64-8 4.821n ± 0% 4.768n ± 0% -1.09% (p=0.000 n=20) Perm3-8 28.89n ± 0% 28.94n ± 0% +0.17% (p=0.000 n=20) Perm30-8 175.7n ± 0% 175.9n ± 0% +0.14% (p=0.000 n=20) Perm30ViaShuffle-8 153.5n ± 0% 152.6n ± 1% ~ (p=0.010 n=20) ShuffleOverhead-8 119.8n ± 1% 119.6n ± 1% ~ (p=0.147 n=20) Concurrent-8 2.433n ± 3% 2.452n ± 3% ~ (p=0.616 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 220860f76f.386 │ 11ad9fdddc.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.370n ± 1% 2.091n ± 1% -11.75% (p=0.000 n=20) GlobalInt64-32 3.569n ± 1% 3.514n ± 2% -1.56% (p=0.000 n=20) GlobalInt63Parallel-32 0.3221n ± 1% 0.3197n ± 0% -0.76% (p=0.000 n=20) GlobalUint64-32 8.797n ± 10% 3.542n ± 1% -59.74% (p=0.000 n=20) GlobalUint64Parallel-32 0.6351n ± 0% 0.3218n ± 0% -49.33% (p=0.000 n=20) Int64-32 2.612n ± 2% 2.552n ± 2% -2.30% (p=0.000 n=20) Uint64-32 3.350n ± 1% 2.566n ± 1% -23.42% (p=0.000 n=20) GlobalIntN1000-32 5.892n ± 1% 5.965n ± 2% ~ (p=0.082 n=20) IntN1000-32 4.546n ± 1% 4.652n ± 1% +2.33% (p=0.000 n=20) Int64N1000-32 14.59n ± 1% 14.48n ± 1% ~ (p=0.652 n=20) Int64N1e8-32 14.76n ± 2% 14.67n ± 1% ~ (p=0.836 n=20) Int64N1e9-32 16.57n ± 1% 16.80n ± 2% ~ (p=0.016 n=20) Int64N2e9-32 14.54n ± 1% 14.52n ± 1% ~ (p=0.533 n=20) Int64N1e18-32 16.14n ± 1% 16.16n ± 1% ~ (p=0.606 n=20) Int64N2e18-32 18.10n ± 1% 17.95n ± 2% ~ (p=0.062 n=20) Int64N4e18-32 18.65n ± 1% 18.35n ± 2% -1.61% (p=0.010 n=20) Int32N1000-32 3.560n ± 1% 3.608n ± 1% +1.33% (p=0.001 n=20) Int32N1e8-32 3.770n ± 2% 3.767n ± 1% ~ (p=0.155 n=20) Int32N1e9-32 4.098n ± 0% 4.130n ± 2% ~ (p=0.016 n=20) Int32N2e9-32 4.179n ± 1% 4.206n ± 1% ~ (p=0.011 n=20) Float32-32 21.18n ± 4% 22.18n ± 4% +4.70% (p=0.003 n=20) Float64-32 20.60n ± 2% 20.75n ± 4% +0.73% (p=0.000 n=20) ExpFloat64-32 13.07n ± 0% 12.58n ± 3% -3.82% (p=0.000 n=20) NormFloat64-32 7.738n ± 2% 7.920n ± 3% ~ (p=0.066 n=20) Perm3-32 36.73n ± 1% 40.27n ± 1% +9.65% (p=0.000 n=20) Perm30-32 211.9n ± 1% 213.2n ± 2% ~ (p=0.262 n=20) Perm30ViaShuffle-32 165.2n ± 1% 164.2n ± 2% ~ (p=0.029 n=20) ShuffleOverhead-32 133.9n ± 1% 134.7n ± 2% ~ (p=0.551 n=20) Concurrent-32 3.287n ± 2% 3.301n ± 2% ~ (p=0.330 n=20) For #61716. Change-Id: I8d2f73f87dd3603a0c2ff069988938e0957b6904 Reviewed-on: https://go-review.googlesource.com/c/go/+/502499 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Rob Pike <r@golang.org>
2023-06-06 09:16:34 -04:00
func (r *Rand) Int64() int64 { return int64(r.src.Uint64() &^ (1 << 63)) }
// Uint32 returns a pseudo-random 32-bit value as a uint32.
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
func (r *Rand) Uint32() uint32 { return uint32(r.src.Uint64() >> 32) }
// Uint64 returns a pseudo-random 64-bit value as a uint64.
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
func (r *Rand) Uint64() uint64 { return r.src.Uint64() }
// Int32 returns a non-negative pseudo-random 31-bit integer as an int32.
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
func (r *Rand) Int32() int32 { return int32(r.src.Uint64() >> 33) }
// Int returns a non-negative pseudo-random int.
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
func (r *Rand) Int() int { return int(uint(r.src.Uint64()) << 1 >> 1) }
// Int64N returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n).
// It panics if n <= 0.
func (r *Rand) Int64N(n int64) int64 {
if n <= 0 {
panic("invalid argument to Int64N")
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return int64(r.uint64n(uint64(n)))
}
// Uint64N returns, as a uint64, a non-negative pseudo-random number in the half-open interval [0,n).
// It panics if n == 0.
func (r *Rand) Uint64N(n uint64) uint64 {
if n == 0 {
panic("invalid argument to Uint64N")
}
return r.uint64n(n)
}
// uint64n is the no-bounds-checks version of Uint64N.
func (r *Rand) uint64n(n uint64) uint64 {
if is32bit && uint64(uint32(n)) == n {
return uint64(r.uint32n(uint32(n)))
}
if n&(n-1) == 0 { // n is power of two, can mask
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return r.Uint64() & (n - 1)
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// Suppose we have a uint64 x uniform in the range [0,2⁶⁴)
// and want to reduce it to the range [0,n) preserving exact uniformity.
// We can simulate a scaling arbitrary precision x * (n/2⁶⁴) by
// the high bits of a double-width multiply of x*n, meaning (x*n)/2⁶⁴.
// Since there are 2⁶⁴ possible inputs x and only n possible outputs,
// the output is necessarily biased if n does not divide 2⁶⁴.
// In general (x*n)/2⁶⁴ = k for x*n in [k*2⁶⁴,(k+1)*2⁶⁴).
// There are either floor(2⁶⁴/n) or ceil(2⁶⁴/n) possible products
// in that range, depending on k.
// But suppose we reject the sample and try again when
// x*n is in [k*2⁶⁴, k*2⁶⁴+(2⁶⁴%n)), meaning rejecting fewer than n possible
// outcomes out of the 2⁶⁴.
// Now there are exactly floor(2⁶⁴/n) possible ways to produce
// each output value k, so we've restored uniformity.
// To get valid uint64 math, 2⁶⁴ % n = (2⁶⁴ - n) % n = -n % n,
// so the direct implementation of this algorithm would be:
//
// hi, lo := bits.Mul64(r.Uint64(), n)
// thresh := -n % n
// for lo < thresh {
// hi, lo = bits.Mul64(r.Uint64(), n)
// }
//
// That still leaves an expensive 64-bit division that we would rather avoid.
// We know that thresh < n, and n is usually much less than 2⁶⁴, so we can
// avoid the last four lines unless lo < n.
//
// See also:
// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
// https://lemire.me/blog/2016/06/30/fast-random-shuffling
hi, lo := bits.Mul64(r.Uint64(), n)
if lo < n {
thresh := -n % n
for lo < thresh {
hi, lo = bits.Mul64(r.Uint64(), n)
}
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return hi
}
// uint32n is an identical computation to uint64n
// but optimized for 32-bit systems.
func (r *Rand) uint32n(n uint32) uint32 {
if n&(n-1) == 0 { // n is power of two, can mask
return uint32(r.Uint64()) & (n - 1)
}
// On 64-bit systems we still use the uint64 code below because
// the probability of a random uint64 lo being < a uint32 n is near zero,
// meaning the unbiasing loop almost never runs.
// On 32-bit systems, here we need to implement that same logic in 32-bit math,
// both to preserve the exact output sequence observed on 64-bit machines
// and to preserve the optimization that the unbiasing loop almost never runs.
//
// We want to compute
// hi, lo := bits.Mul64(r.Uint64(), n)
// In terms of 32-bit halves, this is:
// x1:x0 := r.Uint64()
// 0:hi, lo1:lo0 := bits.Mul64(x1:x0, 0:n)
// Writing out the multiplication in terms of bits.Mul32 allows
// using direct hardware instructions and avoiding
// the computations involving these zeros.
x := r.Uint64()
lo1a, lo0 := bits.Mul32(uint32(x), n)
hi, lo1b := bits.Mul32(uint32(x>>32), n)
lo1, c := bits.Add32(lo1a, lo1b, 0)
hi += c
if lo1 == 0 && lo0 < uint32(n) {
n64 := uint64(n)
thresh := uint32(-n64 % n64)
for lo1 == 0 && lo0 < thresh {
x := r.Uint64()
lo1a, lo0 = bits.Mul32(uint32(x), n)
hi, lo1b = bits.Mul32(uint32(x>>32), n)
lo1, c = bits.Add32(lo1a, lo1b, 0)
hi += c
}
}
return hi
}
// Int32N returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n).
// It panics if n <= 0.
func (r *Rand) Int32N(n int32) int32 {
if n <= 0 {
panic("invalid argument to Int32N")
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return int32(r.uint64n(uint64(n)))
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// Uint32N returns, as a uint32, a non-negative pseudo-random number in the half-open interval [0,n).
// It panics if n == 0.
func (r *Rand) Uint32N(n uint32) uint32 {
if n == 0 {
panic("invalid argument to Uint32N")
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return uint32(r.uint64n(uint64(n)))
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
const is32bit = ^uint(0)>>32 == 0
// IntN returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n).
// It panics if n <= 0.
func (r *Rand) IntN(n int) int {
if n <= 0 {
panic("invalid argument to IntN")
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return int(r.uint64n(uint64(n)))
}
// UintN returns, as a uint, a non-negative pseudo-random number in the half-open interval [0,n).
// It panics if n == 0.
func (r *Rand) UintN(n uint) uint {
if n == 0 {
panic("invalid argument to UintN")
}
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
return uint(r.uint64n(uint64(n)))
}
// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0).
func (r *Rand) Float64() float64 {
math/rand/v2: optimize Float32, Float64 We realized too late after Go 1 that float64(r.Uint64())/(1<<64) is not a correct implementation: it occasionally rounds to 1. The correct implementation is float64(r.Uint64()&(1<<53-1))/(1<<53) but we couldn't change the implementation for compatibility, so we changed it to retry only in the "round to 1" cases. The change to v2 lets us update the algorithm to the simpler, faster one. Note that this implementation cannot generate 2⁻⁵⁴, nor 2⁻¹⁰⁰, nor any of the other numbers between 0 and 2⁻⁵³. A slower algorithm could shift some of the probability of generating these two boundary values over to the values in between, but that would be much slower and not necessarily be better. In particular, the current implementation has the property that there are uniform gaps between the possible returned floats, which might help stability. Also, the result is often scaled and shifted, like Float64()*X+Y. Multiplying by X>1 would open new gaps, and adding most Y would erase all the distinctions that were introduced. The only changes to benchmarks should be in Float32 and Float64. The other changes remain a cautionary tale. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 4d84a369d1.amd64 │ 2703446c2e.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.348n ± 2% 1.337n ± 1% ~ (p=0.662 n=20) GlobalInt64-32 2.082n ± 2% 2.225n ± 2% +6.87% (p=0.000 n=20) GlobalInt64Parallel-32 0.1036n ± 1% 0.1043n ± 2% ~ (p=0.171 n=20) GlobalUint64-32 2.077n ± 2% 2.058n ± 1% ~ (p=0.560 n=20) GlobalUint64Parallel-32 0.1012n ± 1% 0.1009n ± 1% ~ (p=0.995 n=20) Int64-32 1.750n ± 0% 1.719n ± 2% -1.74% (p=0.000 n=20) Uint64-32 1.707n ± 2% 1.669n ± 1% -2.20% (p=0.000 n=20) GlobalIntN1000-32 3.192n ± 1% 3.321n ± 2% +4.04% (p=0.000 n=20) IntN1000-32 2.462n ± 2% 2.479n ± 1% ~ (p=0.417 n=20) Int64N1000-32 2.470n ± 1% 2.477n ± 1% ~ (p=0.664 n=20) Int64N1e8-32 2.503n ± 2% 2.490n ± 1% ~ (p=0.245 n=20) Int64N1e9-32 2.487n ± 1% 2.458n ± 1% ~ (p=0.032 n=20) Int64N2e9-32 2.487n ± 1% 2.486n ± 2% ~ (p=0.507 n=20) Int64N1e18-32 3.006n ± 2% 3.215n ± 2% +6.94% (p=0.000 n=20) Int64N2e18-32 3.368n ± 1% 3.588n ± 2% +6.55% (p=0.000 n=20) Int64N4e18-32 4.763n ± 1% 4.938n ± 2% +3.69% (p=0.000 n=20) Int32N1000-32 2.403n ± 1% 2.673n ± 2% +11.19% (p=0.000 n=20) Int32N1e8-32 2.405n ± 1% 2.631n ± 2% +9.42% (p=0.000 n=20) Int32N1e9-32 2.402n ± 2% 2.628n ± 2% +9.41% (p=0.000 n=20) Int32N2e9-32 2.384n ± 1% 2.684n ± 2% +12.56% (p=0.000 n=20) Float32-32 2.641n ± 2% 2.240n ± 2% -15.18% (p=0.000 n=20) Float64-32 2.483n ± 1% 2.253n ± 1% -9.26% (p=0.000 n=20) ExpFloat64-32 3.486n ± 2% 3.677n ± 1% +5.49% (p=0.000 n=20) NormFloat64-32 3.648n ± 1% 3.761n ± 1% +3.11% (p=0.000 n=20) Perm3-32 33.04n ± 1% 33.55n ± 2% ~ (p=0.180 n=20) Perm30-32 171.9n ± 1% 173.2n ± 1% ~ (p=0.050 n=20) Perm30ViaShuffle-32 100.3n ± 1% 115.9n ± 1% +15.55% (p=0.000 n=20) ShuffleOverhead-32 102.5n ± 1% 101.9n ± 1% ~ (p=0.266 n=20) Concurrent-32 2.101n ± 0% 2.107n ± 6% ~ (p=0.212 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 4d84a369d1.arm64 │ 2703446c2e.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.261n ± 1% 2.275n ± 0% ~ (p=0.082 n=20) GlobalInt64-8 2.160n ± 1% 2.154n ± 1% ~ (p=0.490 n=20) GlobalInt64Parallel-8 0.4299n ± 0% 0.4298n ± 0% ~ (p=0.663 n=20) GlobalUint64-8 2.169n ± 1% 2.160n ± 1% ~ (p=0.292 n=20) GlobalUint64Parallel-8 0.4293n ± 1% 0.4286n ± 0% ~ (p=0.155 n=20) Int64-8 2.473n ± 1% 2.491n ± 1% ~ (p=0.317 n=20) Uint64-8 2.453n ± 1% 2.458n ± 0% ~ (p=0.941 n=20) GlobalIntN1000-8 2.814n ± 2% 2.814n ± 2% ~ (p=0.972 n=20) IntN1000-8 2.933n ± 2% 2.933n ± 0% ~ (p=0.287 n=20) Int64N1000-8 2.934n ± 2% 2.962n ± 1% ~ (p=0.062 n=20) Int64N1e8-8 2.935n ± 2% 2.960n ± 1% ~ (p=0.183 n=20) Int64N1e9-8 2.934n ± 2% 2.935n ± 2% ~ (p=0.367 n=20) Int64N2e9-8 2.935n ± 2% 2.934n ± 0% ~ (p=0.455 n=20) Int64N1e18-8 3.778n ± 1% 3.777n ± 1% ~ (p=0.995 n=20) Int64N2e18-8 4.359n ± 1% 4.359n ± 1% ~ (p=0.122 n=20) Int64N4e18-8 6.546n ± 1% 6.536n ± 1% ~ (p=0.920 n=20) Int32N1000-8 2.940n ± 2% 2.937n ± 0% ~ (p=0.149 n=20) Int32N1e8-8 2.937n ± 2% 2.937n ± 1% ~ (p=0.620 n=20) Int32N1e9-8 2.938n ± 0% 2.936n ± 0% ~ (p=0.046 n=20) Int32N2e9-8 2.938n ± 2% 2.938n ± 2% ~ (p=0.455 n=20) Float32-8 3.486n ± 0% 3.441n ± 0% -1.28% (p=0.000 n=20) Float64-8 3.480n ± 0% 3.441n ± 0% -1.13% (p=0.000 n=20) ExpFloat64-8 4.533n ± 0% 4.486n ± 0% -1.03% (p=0.000 n=20) NormFloat64-8 4.764n ± 0% 4.721n ± 0% -0.90% (p=0.000 n=20) Perm3-8 26.66n ± 0% 26.65n ± 0% ~ (p=0.019 n=20) Perm30-8 143.4n ± 0% 143.2n ± 0% -0.17% (p=0.000 n=20) Perm30ViaShuffle-8 142.9n ± 0% 143.0n ± 0% ~ (p=0.522 n=20) ShuffleOverhead-8 120.7n ± 0% 120.6n ± 1% ~ (p=0.488 n=20) Concurrent-8 2.360n ± 2% 2.399n ± 5% ~ (p=0.062 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 4d84a369d1.386 │ 2703446c2e.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.101n ± 2% 2.072n ± 2% ~ (p=0.273 n=20) GlobalInt64-32 3.518n ± 2% 3.546n ± 27% +0.78% (p=0.007 n=20) GlobalInt64Parallel-32 0.3206n ± 0% 0.3211n ± 0% ~ (p=0.386 n=20) GlobalUint64-32 3.538n ± 1% 3.522n ± 2% ~ (p=0.331 n=20) GlobalUint64Parallel-32 0.3231n ± 0% 0.3172n ± 0% -1.84% (p=0.000 n=20) Int64-32 2.554n ± 2% 2.520n ± 2% ~ (p=0.465 n=20) Uint64-32 2.575n ± 2% 2.581n ± 1% ~ (p=0.213 n=20) GlobalIntN1000-32 6.292n ± 1% 6.171n ± 1% ~ (p=0.015 n=20) IntN1000-32 4.735n ± 1% 4.752n ± 2% ~ (p=0.635 n=20) Int64N1000-32 5.489n ± 2% 5.429n ± 1% ~ (p=0.324 n=20) Int64N1e8-32 5.528n ± 2% 5.469n ± 2% ~ (p=0.013 n=20) Int64N1e9-32 5.438n ± 2% 5.489n ± 2% ~ (p=0.984 n=20) Int64N2e9-32 5.474n ± 1% 5.492n ± 2% ~ (p=0.616 n=20) Int64N1e18-32 9.053n ± 1% 8.927n ± 1% ~ (p=0.037 n=20) Int64N2e18-32 9.685n ± 2% 9.622n ± 1% ~ (p=0.449 n=20) Int64N4e18-32 12.18n ± 1% 12.03n ± 1% ~ (p=0.013 n=20) Int32N1000-32 4.862n ± 1% 4.817n ± 1% -0.94% (p=0.002 n=20) Int32N1e8-32 4.758n ± 2% 4.801n ± 1% ~ (p=0.597 n=20) Int32N1e9-32 4.772n ± 1% 4.798n ± 1% ~ (p=0.774 n=20) Int32N2e9-32 4.847n ± 0% 4.840n ± 1% ~ (p=0.867 n=20) Float32-32 22.18n ± 4% 10.51n ± 4% -52.61% (p=0.000 n=20) Float64-32 21.21n ± 3% 20.33n ± 3% -4.17% (p=0.000 n=20) ExpFloat64-32 12.39n ± 2% 12.59n ± 2% ~ (p=0.139 n=20) NormFloat64-32 7.422n ± 1% 7.350n ± 2% ~ (p=0.208 n=20) Perm3-32 38.00n ± 2% 39.29n ± 2% +3.38% (p=0.000 n=20) Perm30-32 212.7n ± 1% 219.1n ± 2% +3.03% (p=0.001 n=20) Perm30ViaShuffle-32 187.5n ± 2% 189.8n ± 2% ~ (p=0.457 n=20) ShuffleOverhead-32 159.7n ± 1% 158.9n ± 2% ~ (p=0.920 n=20) Concurrent-32 3.470n ± 0% 3.306n ± 3% -4.71% (p=0.000 n=20) For #61716. Change-Id: I1933f1f9efd7e6e832d83e7fa5d84398f67d41f5 Reviewed-on: https://go-review.googlesource.com/c/go/+/502503 Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Rob Pike <r@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 10:49:19 -04:00
// There are exactly 1<<53 float64s in [0,1). Use Intn(1<<53) / (1<<53).
return float64(r.Uint64()<<11>>11) / (1 << 53)
}
// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0).
func (r *Rand) Float32() float32 {
math/rand/v2: optimize Float32, Float64 We realized too late after Go 1 that float64(r.Uint64())/(1<<64) is not a correct implementation: it occasionally rounds to 1. The correct implementation is float64(r.Uint64()&(1<<53-1))/(1<<53) but we couldn't change the implementation for compatibility, so we changed it to retry only in the "round to 1" cases. The change to v2 lets us update the algorithm to the simpler, faster one. Note that this implementation cannot generate 2⁻⁵⁴, nor 2⁻¹⁰⁰, nor any of the other numbers between 0 and 2⁻⁵³. A slower algorithm could shift some of the probability of generating these two boundary values over to the values in between, but that would be much slower and not necessarily be better. In particular, the current implementation has the property that there are uniform gaps between the possible returned floats, which might help stability. Also, the result is often scaled and shifted, like Float64()*X+Y. Multiplying by X>1 would open new gaps, and adding most Y would erase all the distinctions that were introduced. The only changes to benchmarks should be in Float32 and Float64. The other changes remain a cautionary tale. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 4d84a369d1.amd64 │ 2703446c2e.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.348n ± 2% 1.337n ± 1% ~ (p=0.662 n=20) GlobalInt64-32 2.082n ± 2% 2.225n ± 2% +6.87% (p=0.000 n=20) GlobalInt64Parallel-32 0.1036n ± 1% 0.1043n ± 2% ~ (p=0.171 n=20) GlobalUint64-32 2.077n ± 2% 2.058n ± 1% ~ (p=0.560 n=20) GlobalUint64Parallel-32 0.1012n ± 1% 0.1009n ± 1% ~ (p=0.995 n=20) Int64-32 1.750n ± 0% 1.719n ± 2% -1.74% (p=0.000 n=20) Uint64-32 1.707n ± 2% 1.669n ± 1% -2.20% (p=0.000 n=20) GlobalIntN1000-32 3.192n ± 1% 3.321n ± 2% +4.04% (p=0.000 n=20) IntN1000-32 2.462n ± 2% 2.479n ± 1% ~ (p=0.417 n=20) Int64N1000-32 2.470n ± 1% 2.477n ± 1% ~ (p=0.664 n=20) Int64N1e8-32 2.503n ± 2% 2.490n ± 1% ~ (p=0.245 n=20) Int64N1e9-32 2.487n ± 1% 2.458n ± 1% ~ (p=0.032 n=20) Int64N2e9-32 2.487n ± 1% 2.486n ± 2% ~ (p=0.507 n=20) Int64N1e18-32 3.006n ± 2% 3.215n ± 2% +6.94% (p=0.000 n=20) Int64N2e18-32 3.368n ± 1% 3.588n ± 2% +6.55% (p=0.000 n=20) Int64N4e18-32 4.763n ± 1% 4.938n ± 2% +3.69% (p=0.000 n=20) Int32N1000-32 2.403n ± 1% 2.673n ± 2% +11.19% (p=0.000 n=20) Int32N1e8-32 2.405n ± 1% 2.631n ± 2% +9.42% (p=0.000 n=20) Int32N1e9-32 2.402n ± 2% 2.628n ± 2% +9.41% (p=0.000 n=20) Int32N2e9-32 2.384n ± 1% 2.684n ± 2% +12.56% (p=0.000 n=20) Float32-32 2.641n ± 2% 2.240n ± 2% -15.18% (p=0.000 n=20) Float64-32 2.483n ± 1% 2.253n ± 1% -9.26% (p=0.000 n=20) ExpFloat64-32 3.486n ± 2% 3.677n ± 1% +5.49% (p=0.000 n=20) NormFloat64-32 3.648n ± 1% 3.761n ± 1% +3.11% (p=0.000 n=20) Perm3-32 33.04n ± 1% 33.55n ± 2% ~ (p=0.180 n=20) Perm30-32 171.9n ± 1% 173.2n ± 1% ~ (p=0.050 n=20) Perm30ViaShuffle-32 100.3n ± 1% 115.9n ± 1% +15.55% (p=0.000 n=20) ShuffleOverhead-32 102.5n ± 1% 101.9n ± 1% ~ (p=0.266 n=20) Concurrent-32 2.101n ± 0% 2.107n ± 6% ~ (p=0.212 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 4d84a369d1.arm64 │ 2703446c2e.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.261n ± 1% 2.275n ± 0% ~ (p=0.082 n=20) GlobalInt64-8 2.160n ± 1% 2.154n ± 1% ~ (p=0.490 n=20) GlobalInt64Parallel-8 0.4299n ± 0% 0.4298n ± 0% ~ (p=0.663 n=20) GlobalUint64-8 2.169n ± 1% 2.160n ± 1% ~ (p=0.292 n=20) GlobalUint64Parallel-8 0.4293n ± 1% 0.4286n ± 0% ~ (p=0.155 n=20) Int64-8 2.473n ± 1% 2.491n ± 1% ~ (p=0.317 n=20) Uint64-8 2.453n ± 1% 2.458n ± 0% ~ (p=0.941 n=20) GlobalIntN1000-8 2.814n ± 2% 2.814n ± 2% ~ (p=0.972 n=20) IntN1000-8 2.933n ± 2% 2.933n ± 0% ~ (p=0.287 n=20) Int64N1000-8 2.934n ± 2% 2.962n ± 1% ~ (p=0.062 n=20) Int64N1e8-8 2.935n ± 2% 2.960n ± 1% ~ (p=0.183 n=20) Int64N1e9-8 2.934n ± 2% 2.935n ± 2% ~ (p=0.367 n=20) Int64N2e9-8 2.935n ± 2% 2.934n ± 0% ~ (p=0.455 n=20) Int64N1e18-8 3.778n ± 1% 3.777n ± 1% ~ (p=0.995 n=20) Int64N2e18-8 4.359n ± 1% 4.359n ± 1% ~ (p=0.122 n=20) Int64N4e18-8 6.546n ± 1% 6.536n ± 1% ~ (p=0.920 n=20) Int32N1000-8 2.940n ± 2% 2.937n ± 0% ~ (p=0.149 n=20) Int32N1e8-8 2.937n ± 2% 2.937n ± 1% ~ (p=0.620 n=20) Int32N1e9-8 2.938n ± 0% 2.936n ± 0% ~ (p=0.046 n=20) Int32N2e9-8 2.938n ± 2% 2.938n ± 2% ~ (p=0.455 n=20) Float32-8 3.486n ± 0% 3.441n ± 0% -1.28% (p=0.000 n=20) Float64-8 3.480n ± 0% 3.441n ± 0% -1.13% (p=0.000 n=20) ExpFloat64-8 4.533n ± 0% 4.486n ± 0% -1.03% (p=0.000 n=20) NormFloat64-8 4.764n ± 0% 4.721n ± 0% -0.90% (p=0.000 n=20) Perm3-8 26.66n ± 0% 26.65n ± 0% ~ (p=0.019 n=20) Perm30-8 143.4n ± 0% 143.2n ± 0% -0.17% (p=0.000 n=20) Perm30ViaShuffle-8 142.9n ± 0% 143.0n ± 0% ~ (p=0.522 n=20) ShuffleOverhead-8 120.7n ± 0% 120.6n ± 1% ~ (p=0.488 n=20) Concurrent-8 2.360n ± 2% 2.399n ± 5% ~ (p=0.062 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 4d84a369d1.386 │ 2703446c2e.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.101n ± 2% 2.072n ± 2% ~ (p=0.273 n=20) GlobalInt64-32 3.518n ± 2% 3.546n ± 27% +0.78% (p=0.007 n=20) GlobalInt64Parallel-32 0.3206n ± 0% 0.3211n ± 0% ~ (p=0.386 n=20) GlobalUint64-32 3.538n ± 1% 3.522n ± 2% ~ (p=0.331 n=20) GlobalUint64Parallel-32 0.3231n ± 0% 0.3172n ± 0% -1.84% (p=0.000 n=20) Int64-32 2.554n ± 2% 2.520n ± 2% ~ (p=0.465 n=20) Uint64-32 2.575n ± 2% 2.581n ± 1% ~ (p=0.213 n=20) GlobalIntN1000-32 6.292n ± 1% 6.171n ± 1% ~ (p=0.015 n=20) IntN1000-32 4.735n ± 1% 4.752n ± 2% ~ (p=0.635 n=20) Int64N1000-32 5.489n ± 2% 5.429n ± 1% ~ (p=0.324 n=20) Int64N1e8-32 5.528n ± 2% 5.469n ± 2% ~ (p=0.013 n=20) Int64N1e9-32 5.438n ± 2% 5.489n ± 2% ~ (p=0.984 n=20) Int64N2e9-32 5.474n ± 1% 5.492n ± 2% ~ (p=0.616 n=20) Int64N1e18-32 9.053n ± 1% 8.927n ± 1% ~ (p=0.037 n=20) Int64N2e18-32 9.685n ± 2% 9.622n ± 1% ~ (p=0.449 n=20) Int64N4e18-32 12.18n ± 1% 12.03n ± 1% ~ (p=0.013 n=20) Int32N1000-32 4.862n ± 1% 4.817n ± 1% -0.94% (p=0.002 n=20) Int32N1e8-32 4.758n ± 2% 4.801n ± 1% ~ (p=0.597 n=20) Int32N1e9-32 4.772n ± 1% 4.798n ± 1% ~ (p=0.774 n=20) Int32N2e9-32 4.847n ± 0% 4.840n ± 1% ~ (p=0.867 n=20) Float32-32 22.18n ± 4% 10.51n ± 4% -52.61% (p=0.000 n=20) Float64-32 21.21n ± 3% 20.33n ± 3% -4.17% (p=0.000 n=20) ExpFloat64-32 12.39n ± 2% 12.59n ± 2% ~ (p=0.139 n=20) NormFloat64-32 7.422n ± 1% 7.350n ± 2% ~ (p=0.208 n=20) Perm3-32 38.00n ± 2% 39.29n ± 2% +3.38% (p=0.000 n=20) Perm30-32 212.7n ± 1% 219.1n ± 2% +3.03% (p=0.001 n=20) Perm30ViaShuffle-32 187.5n ± 2% 189.8n ± 2% ~ (p=0.457 n=20) ShuffleOverhead-32 159.7n ± 1% 158.9n ± 2% ~ (p=0.920 n=20) Concurrent-32 3.470n ± 0% 3.306n ± 3% -4.71% (p=0.000 n=20) For #61716. Change-Id: I1933f1f9efd7e6e832d83e7fa5d84398f67d41f5 Reviewed-on: https://go-review.googlesource.com/c/go/+/502503 Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Rob Pike <r@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 10:49:19 -04:00
// There are exactly 1<<24 float32s in [0,1). Use Intn(1<<24) / (1<<24).
return float32(r.Uint32()<<8>>8) / (1 << 24)
}
// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers
// in the half-open interval [0,n).
func (r *Rand) Perm(n int) []int {
m := make([]int, n)
// In the following loop, the iteration when i=0 always swaps m[0] with m[0].
// A change to remove this useless iteration is to assign 1 to i in the init
// statement. But Perm also effects r. Making this change will affect
// the final state of r. So this change can't be made for compatibility
// reasons for Go 1.
for i := 0; i < n; i++ {
j := r.IntN(i + 1)
m[i] = m[j]
m[j] = i
}
return m
}
// Shuffle pseudo-randomizes the order of elements.
// n is the number of elements. Shuffle panics if n < 0.
// swap swaps the elements with indexes i and j.
func (r *Rand) Shuffle(n int, swap func(i, j int)) {
if n < 0 {
panic("invalid argument to Shuffle")
}
// Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
// Shuffle really ought not be called with n that doesn't fit in 32 bits.
// Not only will it take a very long time, but with 2³¹! possible permutations,
// there's no way that any PRNG can have a big enough internal state to
// generate even a minuscule percentage of the possible permutations.
// Nevertheless, the right API signature accepts an int n, so handle it as best we can.
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
for i := n - 1; i > 0; i-- {
j := int(r.uint64n(uint64(i + 1)))
swap(i, j)
}
}
/*
* Top-level convenience functions
*/
// globalRand is the source of random numbers for the top-level
// convenience functions.
var globalRand = &Rand{src: &fastSource{}}
//go:linkname fastrand64
func fastrand64() uint64
// fastSource is a Source that uses the runtime fastrand functions.
type fastSource struct{}
func (*fastSource) Int64() int64 {
return int64(fastrand64() & rngMask)
}
func (*fastSource) Uint64() uint64 {
return fastrand64()
}
// Int64 returns a non-negative pseudo-random 63-bit integer as an int64
// from the default Source.
func Int64() int64 { return globalRand.Int64() }
// Uint32 returns a pseudo-random 32-bit value as a uint32
// from the default Source.
func Uint32() uint32 { return globalRand.Uint32() }
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// Uint64N returns, as a uint64, a pseudo-random number in the half-open interval [0,n)
// from the default Source.
// It panics if n <= 0.
func Uint64N(n uint64) uint64 { return globalRand.Uint64N(n) }
// Uint32N returns, as a uint32, a pseudo-random number in the half-open interval [0,n)
// from the default Source.
// It panics if n <= 0.
func Uint32N(n uint32) uint32 { return globalRand.Uint32N(n) }
// Uint64 returns a pseudo-random 64-bit value as a uint64
// from the default Source.
func Uint64() uint64 { return globalRand.Uint64() }
// Int32 returns a non-negative pseudo-random 31-bit integer as an int32
// from the default Source.
func Int32() int32 { return globalRand.Int32() }
// Int returns a non-negative pseudo-random int from the default Source.
func Int() int { return globalRand.Int() }
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// Int64N returns, as an int64, a pseudo-random number in the half-open interval [0,n)
// from the default Source.
// It panics if n <= 0.
func Int64N(n int64) int64 { return globalRand.Int64N(n) }
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// Int32N returns, as an int32, a pseudo-random number in the half-open interval [0,n)
// from the default Source.
// It panics if n <= 0.
func Int32N(n int32) int32 { return globalRand.Int32N(n) }
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// IntN returns, as an int, a pseudo-random number in the half-open interval [0,n)
// from the default Source.
// It panics if n <= 0.
func IntN(n int) int { return globalRand.IntN(n) }
math/rand/v2: add, optimize N, UintN, Uint32N, Uint64N Now that we can break the value stream, we can take advantage of better algorithms that have been suggested since the original code was written. Also optimizes IntN, Int32N, Int64N, Perm (indirectly). All the N variants (IntN, Int32N, Int64N, UintN, N, etc) now return the same values given a Source and parameter n, so that for example uint(r.IntN(10)) and r.UintN(10) and r.N(uint(10)) are completely interchangeable. Int64N4e18 gets slower but that is a near worst case for the algorithm and is extremely unlikely in practice. 32-bit Int32N variants got slower too, by 15-30%, in exchange for speeding up everything on 64-bit systems and consistency across the N functions. Also rename previously missed benchmark GlobalInt63Parallel to GlobalInt64Parallel. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.amd64 │ 4d84a369d1.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.335n ± 1% 1.348n ± 2% ~ (p=0.335 n=20) GlobalInt64-32 2.046n ± 1% 2.082n ± 2% ~ (p=0.310 n=20) GlobalInt63Parallel-32 0.1037n ± 1% GlobalInt64Parallel-32 0.1036n ± 1% GlobalUint64-32 2.075n ± 0% 2.077n ± 2% ~ (p=0.228 n=20) GlobalUint64Parallel-32 0.1013n ± 1% 0.1012n ± 1% ~ (p=0.878 n=20) Int64-32 1.726n ± 2% 1.750n ± 0% +1.39% (p=0.000 n=20) Uint64-32 1.673n ± 1% 1.707n ± 2% +2.03% (p=0.002 n=20) GlobalIntN1000-32 3.895n ± 2% 3.192n ± 1% -18.05% (p=0.000 n=20) IntN1000-32 3.403n ± 1% 2.462n ± 2% -27.65% (p=0.000 n=20) Int64N1000-32 3.053n ± 2% 2.470n ± 1% -19.11% (p=0.000 n=20) Int64N1e8-32 2.718n ± 1% 2.503n ± 2% -7.91% (p=0.000 n=20) Int64N1e9-32 2.712n ± 1% 2.487n ± 1% -8.31% (p=0.000 n=20) Int64N2e9-32 2.690n ± 1% 2.487n ± 1% -7.57% (p=0.000 n=20) Int64N1e18-32 3.084n ± 2% 3.006n ± 2% -2.53% (p=0.000 n=20) Int64N2e18-32 4.026n ± 1% 3.368n ± 1% -16.33% (p=0.000 n=20) Int64N4e18-32 4.049n ± 2% 4.763n ± 1% +17.62% (p=0.000 n=20) Int32N1000-32 2.730n ± 0% 2.403n ± 1% -11.94% (p=0.000 n=20) Int32N1e8-32 2.916n ± 2% 2.405n ± 1% -17.53% (p=0.000 n=20) Int32N1e9-32 3.375n ± 1% 2.402n ± 2% -28.83% (p=0.000 n=20) Int32N2e9-32 3.292n ± 1% 2.384n ± 1% -27.58% (p=0.000 n=20) Float32-32 2.673n ± 1% 2.641n ± 2% ~ (p=0.147 n=20) Float64-32 2.485n ± 1% 2.483n ± 1% ~ (p=0.804 n=20) ExpFloat64-32 3.577n ± 2% 3.486n ± 2% -2.57% (p=0.000 n=20) NormFloat64-32 3.797n ± 2% 3.648n ± 1% -3.92% (p=0.000 n=20) Perm3-32 35.79n ± 2% 33.04n ± 1% -7.68% (p=0.000 n=20) Perm30-32 205.1n ± 1% 171.9n ± 1% -16.14% (p=0.000 n=20) Perm30ViaShuffle-32 111.2n ± 2% 100.3n ± 1% -9.76% (p=0.000 n=20) ShuffleOverhead-32 100.5n ± 2% 102.5n ± 1% +1.99% (p=0.007 n=20) Concurrent-32 2.188n ± 5% 2.101n ± 0% ~ (p=0.013 n=20) goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 11ad9fdddc.arm64 │ 4d84a369d1.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.272n ± 1% 2.261n ± 1% ~ (p=0.172 n=20) GlobalInt64-8 2.155n ± 1% 2.160n ± 1% ~ (p=0.482 n=20) GlobalInt63Parallel-8 0.4352n ± 0% GlobalInt64Parallel-8 0.4299n ± 0% GlobalUint64-8 2.173n ± 1% 2.169n ± 1% ~ (p=0.262 n=20) GlobalUint64Parallel-8 0.4340n ± 0% 0.4293n ± 1% -1.08% (p=0.000 n=20) Int64-8 2.544n ± 1% 2.473n ± 1% -2.83% (p=0.000 n=20) Uint64-8 2.552n ± 1% 2.453n ± 1% -3.90% (p=0.000 n=20) GlobalIntN1000-8 3.856n ± 0% 2.814n ± 2% -27.02% (p=0.000 n=20) IntN1000-8 3.820n ± 0% 2.933n ± 2% -23.22% (p=0.000 n=20) Int64N1000-8 3.219n ± 2% 2.934n ± 2% -8.85% (p=0.000 n=20) Int64N1e8-8 3.221n ± 2% 2.935n ± 2% -8.91% (p=0.000 n=20) Int64N1e9-8 3.276n ± 2% 2.934n ± 2% -10.44% (p=0.000 n=20) Int64N2e9-8 3.217n ± 0% 2.935n ± 2% -8.78% (p=0.000 n=20) Int64N1e18-8 3.502n ± 2% 3.778n ± 1% +7.91% (p=0.000 n=20) Int64N2e18-8 4.968n ± 1% 4.359n ± 1% -12.26% (p=0.000 n=20) Int64N4e18-8 4.963n ± 0% 6.546n ± 1% +31.92% (p=0.000 n=20) Int32N1000-8 3.189n ± 1% 2.940n ± 2% -7.81% (p=0.000 n=20) Int32N1e8-8 3.514n ± 1% 2.937n ± 2% -16.41% (p=0.000 n=20) Int32N1e9-8 4.133n ± 0% 2.938n ± 0% -28.91% (p=0.000 n=20) Int32N2e9-8 4.137n ± 0% 2.938n ± 2% -28.97% (p=0.000 n=20) Float32-8 3.468n ± 1% 3.486n ± 0% +0.52% (p=0.000 n=20) Float64-8 3.478n ± 0% 3.480n ± 0% ~ (p=0.063 n=20) ExpFloat64-8 4.563n ± 0% 4.533n ± 0% -0.67% (p=0.000 n=20) NormFloat64-8 4.768n ± 0% 4.764n ± 0% -0.07% (p=0.001 n=20) Perm3-8 28.94n ± 0% 26.66n ± 0% -7.88% (p=0.000 n=20) Perm30-8 175.9n ± 0% 143.4n ± 0% -18.50% (p=0.000 n=20) Perm30ViaShuffle-8 152.6n ± 1% 142.9n ± 0% -6.29% (p=0.000 n=20) ShuffleOverhead-8 119.6n ± 1% 120.7n ± 0% +0.96% (p=0.000 n=20) Concurrent-8 2.452n ± 3% 2.360n ± 2% -3.73% (p=0.007 n=20) goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 11ad9fdddc.386 │ 4d84a369d1.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.091n ± 1% 2.101n ± 2% ~ (p=0.672 n=20) GlobalInt64-32 3.514n ± 2% 3.518n ± 2% ~ (p=0.723 n=20) GlobalInt63Parallel-32 0.3197n ± 0% GlobalInt64Parallel-32 0.3206n ± 0% GlobalUint64-32 3.542n ± 1% 3.538n ± 1% ~ (p=0.304 n=20) GlobalUint64Parallel-32 0.3218n ± 0% 0.3231n ± 0% ~ (p=0.071 n=20) Int64-32 2.552n ± 2% 2.554n ± 2% ~ (p=0.693 n=20) Uint64-32 2.566n ± 1% 2.575n ± 2% ~ (p=0.606 n=20) GlobalIntN1000-32 5.965n ± 2% 6.292n ± 1% +5.46% (p=0.000 n=20) IntN1000-32 4.652n ± 1% 4.735n ± 1% +1.77% (p=0.000 n=20) Int64N1000-32 14.485n ± 1% 5.489n ± 2% -62.11% (p=0.000 n=20) Int64N1e8-32 14.675n ± 1% 5.528n ± 2% -62.33% (p=0.000 n=20) Int64N1e9-32 16.805n ± 2% 5.438n ± 2% -67.64% (p=0.000 n=20) Int64N2e9-32 14.515n ± 1% 5.474n ± 1% -62.28% (p=0.000 n=20) Int64N1e18-32 16.165n ± 1% 9.053n ± 1% -44.00% (p=0.000 n=20) Int64N2e18-32 17.945n ± 2% 9.685n ± 2% -46.03% (p=0.000 n=20) Int64N4e18-32 18.35n ± 2% 12.18n ± 1% -33.62% (p=0.000 n=20) Int32N1000-32 3.608n ± 1% 4.862n ± 1% +34.77% (p=0.000 n=20) Int32N1e8-32 3.767n ± 1% 4.758n ± 2% +26.31% (p=0.000 n=20) Int32N1e9-32 4.130n ± 2% 4.772n ± 1% +15.54% (p=0.000 n=20) Int32N2e9-32 4.206n ± 1% 4.847n ± 0% +15.24% (p=0.000 n=20) Float32-32 22.18n ± 4% 22.18n ± 4% ~ (p=0.195 n=20) Float64-32 20.75n ± 4% 21.21n ± 3% ~ (p=0.394 n=20) ExpFloat64-32 12.58n ± 3% 12.39n ± 2% ~ (p=0.032 n=20) NormFloat64-32 7.920n ± 3% 7.422n ± 1% -6.29% (p=0.000 n=20) Perm3-32 40.27n ± 1% 38.00n ± 2% -5.65% (p=0.000 n=20) Perm30-32 213.2n ± 2% 212.7n ± 1% ~ (p=0.995 n=20) Perm30ViaShuffle-32 164.2n ± 2% 187.5n ± 2% +14.22% (p=0.000 n=20) ShuffleOverhead-32 134.7n ± 2% 159.7n ± 1% +18.52% (p=0.000 n=20) Concurrent-32 3.301n ± 2% 3.470n ± 0% +5.10% (p=0.000 n=20) For #61716. Change-Id: Id1481b04202883cd0b23e21bb58d1bca4e482bd3 Reviewed-on: https://go-review.googlesource.com/c/go/+/502500 Reviewed-by: Rob Pike <r@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2023-06-06 09:43:20 -04:00
// UintN returns, as a uint, a pseudo-random number in the half-open interval [0,n)
// from the default Source.
// It panics if n <= 0.
func UintN(n uint) uint { return globalRand.UintN(n) }
// N returns a pseudo-random number in the half-open interval [0,n) from the default Source.
// The type parameter Int can be any integer type.
// It panics if n <= 0.
func N[Int intType](n Int) Int {
if n <= 0 {
panic("invalid argument to N")
}
return Int(globalRand.uint64n(uint64(n)))
}
type intType interface {
~int | ~int8 | ~int16 | ~int32 | ~int64 |
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr
}
// Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0)
// from the default Source.
func Float64() float64 { return globalRand.Float64() }
// Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0)
// from the default Source.
func Float32() float32 { return globalRand.Float32() }
// Perm returns, as a slice of n ints, a pseudo-random permutation of the integers
// in the half-open interval [0,n) from the default Source.
func Perm(n int) []int { return globalRand.Perm(n) }
// Shuffle pseudo-randomizes the order of elements using the default Source.
// n is the number of elements. Shuffle panics if n < 0.
// swap swaps the elements with indexes i and j.
func Shuffle(n int, swap func(i, j int)) { globalRand.Shuffle(n, swap) }
// NormFloat64 returns a normally distributed float64 in the range
// [-math.MaxFloat64, +math.MaxFloat64] with
// standard normal distribution (mean = 0, stddev = 1)
// from the default Source.
// To produce a different normal distribution, callers can
// adjust the output using:
//
// sample = NormFloat64() * desiredStdDev + desiredMean
func NormFloat64() float64 { return globalRand.NormFloat64() }
// ExpFloat64 returns an exponentially distributed float64 in the range
// (0, +math.MaxFloat64] with an exponential distribution whose rate parameter
// (lambda) is 1 and whose mean is 1/lambda (1) from the default Source.
// To produce a distribution with a different rate parameter,
// callers can adjust the output using:
//
// sample = ExpFloat64() / desiredRateParameter
func ExpFloat64() float64 { return globalRand.ExpFloat64() }