go/src/math/bits/bits_test.go

641 lines
14 KiB
Go
Raw Normal View History

// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bits
import (
"testing"
"unsafe"
)
func TestUintSize(t *testing.T) {
var x uint
if want := unsafe.Sizeof(x) * 8; UintSize != want {
t.Fatalf("UintSize = %d; want %d", UintSize, want)
}
}
func TestLeadingZeros(t *testing.T) {
for i := 0; i < 256; i++ {
nlz := tab[i].nlz
for k := 0; k < 64-8; k++ {
x := uint64(i) << uint(k)
if x <= 1<<8-1 {
got := LeadingZeros8(uint8(x))
want := nlz - k + (8 - 8)
if x == 0 {
want = 8
}
if got != want {
t.Fatalf("LeadingZeros8(%#02x) == %d; want %d", x, got, want)
}
}
if x <= 1<<16-1 {
got := LeadingZeros16(uint16(x))
want := nlz - k + (16 - 8)
if x == 0 {
want = 16
}
if got != want {
t.Fatalf("LeadingZeros16(%#04x) == %d; want %d", x, got, want)
}
}
if x <= 1<<32-1 {
got := LeadingZeros32(uint32(x))
want := nlz - k + (32 - 8)
if x == 0 {
want = 32
}
if got != want {
t.Fatalf("LeadingZeros32(%#08x) == %d; want %d", x, got, want)
}
if UintSize == 32 {
got = LeadingZeros(uint(x))
if got != want {
t.Fatalf("LeadingZeros(%#08x) == %d; want %d", x, got, want)
}
}
}
if x <= 1<<64-1 {
got := LeadingZeros64(uint64(x))
want := nlz - k + (64 - 8)
if x == 0 {
want = 64
}
if got != want {
t.Fatalf("LeadingZeros64(%#016x) == %d; want %d", x, got, want)
}
if UintSize == 64 {
got = LeadingZeros(uint(x))
if got != want {
t.Fatalf("LeadingZeros(%#016x) == %d; want %d", x, got, want)
}
}
}
}
}
}
func TestTrailingZeros(t *testing.T) {
for i := 0; i < 256; i++ {
ntz := tab[i].ntz
for k := 0; k < 64-8; k++ {
x := uint64(i) << uint(k)
want := ntz + k
if x <= 1<<8-1 {
got := TrailingZeros8(uint8(x))
if x == 0 {
want = 8
}
if got != want {
t.Fatalf("TrailingZeros8(%#02x) == %d; want %d", x, got, want)
}
}
if x <= 1<<16-1 {
got := TrailingZeros16(uint16(x))
if x == 0 {
want = 16
}
if got != want {
t.Fatalf("TrailingZeros16(%#04x) == %d; want %d", x, got, want)
}
}
if x <= 1<<32-1 {
got := TrailingZeros32(uint32(x))
if x == 0 {
want = 32
}
if got != want {
t.Fatalf("TrailingZeros32(%#08x) == %d; want %d", x, got, want)
}
if UintSize == 32 {
got = TrailingZeros(uint(x))
if got != want {
t.Fatalf("TrailingZeros(%#08x) == %d; want %d", x, got, want)
}
}
}
if x <= 1<<64-1 {
got := TrailingZeros64(uint64(x))
if x == 0 {
want = 64
}
if got != want {
t.Fatalf("TrailingZeros64(%#016x) == %d; want %d", x, got, want)
}
if UintSize == 64 {
got = TrailingZeros(uint(x))
if got != want {
t.Fatalf("TrailingZeros(%#016x) == %d; want %d", x, got, want)
}
}
}
}
}
}
func TestOnesCount(t *testing.T) {
for i := 0; i < 256; i++ {
want := tab[i].pop
for k := 0; k < 64-8; k++ {
x := uint64(i) << uint(k)
if x <= 1<<8-1 {
got := OnesCount8(uint8(x))
if got != want {
t.Fatalf("OnesCount8(%#02x) == %d; want %d", x, got, want)
}
}
if x <= 1<<16-1 {
got := OnesCount16(uint16(x))
if got != want {
t.Fatalf("OnesCount16(%#04x) == %d; want %d", x, got, want)
}
}
if x <= 1<<32-1 {
got := OnesCount32(uint32(x))
if got != want {
t.Fatalf("OnesCount32(%#08x) == %d; want %d", x, got, want)
}
if UintSize == 32 {
got = OnesCount(uint(x))
if got != want {
t.Fatalf("OnesCount(%#08x) == %d; want %d", x, got, want)
}
}
}
if x <= 1<<64-1 {
got := OnesCount64(uint64(x))
if got != want {
t.Fatalf("OnesCount64(%#016x) == %d; want %d", x, got, want)
}
if UintSize == 64 {
got = OnesCount(uint(x))
if got != want {
t.Fatalf("OnesCount(%#016x) == %d; want %d", x, got, want)
}
}
}
}
}
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
// Exported (global) variable to store function results
// during benchmarking to ensure side-effect free calls
// are not optimized away.
var Unused int
// Exported (global) variable serving as input for some
// of the benchmarks to ensure side-effect free calls
// are not optimized away.
var Input uint64 = deBruijn64
func BenchmarkOnesCount(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount(uint(Input))
}
Unused = s
}
func BenchmarkOnesCount8(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount8(uint8(Input))
}
Unused = s
}
func BenchmarkOnesCount16(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount16(uint16(Input))
}
Unused = s
}
func BenchmarkOnesCount32(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount32(uint32(Input))
}
Unused = s
}
func BenchmarkOnesCount64(b *testing.B) {
var s int
for i := 0; i < b.N; i++ {
s += OnesCount64(uint64(Input))
}
Unused = s
}
func TestRotateLeft(t *testing.T) {
var m uint64 = deBruijn64
for k := uint(0); k < 128; k++ {
x8 := uint8(m)
got8 := RotateLeft8(x8, int(k))
want8 := x8<<(k&0x7) | x8>>(8-k&0x7)
if got8 != want8 {
t.Fatalf("RotateLeft8(%#02x, %d) == %#02x; want %#02x", x8, k, got8, want8)
}
x16 := uint16(m)
got16 := RotateLeft16(x16, int(k))
want16 := x16<<(k&0xf) | x16>>(16-k&0xf)
if got16 != want16 {
t.Fatalf("RotateLeft16(%#04x, %d) == %#04x; want %#04x", x16, k, got16, want16)
}
x32 := uint32(m)
got32 := RotateLeft32(x32, int(k))
want32 := x32<<(k&0x1f) | x32>>(32-k&0x1f)
if got32 != want32 {
t.Fatalf("RotateLeft32(%#08x, %d) == %#08x; want %#08x", x32, k, got32, want32)
}
if UintSize == 32 {
x := uint(m)
got := RotateLeft(x, int(k))
want := x<<(k&0x1f) | x>>(32-k&0x1f)
if got != want {
t.Fatalf("RotateLeft(%#08x, %d) == %#08x; want %#08x", x, k, got, want)
}
}
x64 := uint64(m)
got64 := RotateLeft64(x64, int(k))
want64 := x64<<(k&0x3f) | x64>>(64-k&0x3f)
if got64 != want64 {
t.Fatalf("RotateLeft64(%#016x, %d) == %#016x; want %#016x", x64, k, got64, want64)
}
if UintSize == 64 {
x := uint(m)
got := RotateLeft(x, int(k))
want := x<<(k&0x3f) | x>>(64-k&0x3f)
if got != want {
t.Fatalf("RotateLeft(%#016x, %d) == %#016x; want %#016x", x, k, got, want)
}
}
}
}
func TestRotateRight(t *testing.T) {
var m uint64 = deBruijn64
for k := uint(0); k < 128; k++ {
x8 := uint8(m)
got8 := RotateRight8(x8, int(k))
want8 := x8>>(k&0x7) | x8<<(8-k&0x7)
if got8 != want8 {
t.Fatalf("RotateRight8(%#02x, %d) == %#02x; want %#02x", x8, k, got8, want8)
}
x16 := uint16(m)
got16 := RotateRight16(x16, int(k))
want16 := x16>>(k&0xf) | x16<<(16-k&0xf)
if got16 != want16 {
t.Fatalf("RotateRight16(%#04x, %d) == %#04x; want %#04x", x16, k, got16, want16)
}
x32 := uint32(m)
got32 := RotateRight32(x32, int(k))
want32 := x32>>(k&0x1f) | x32<<(32-k&0x1f)
if got32 != want32 {
t.Fatalf("RotateRight32(%#08x, %d) == %#08x; want %#08x", x32, k, got32, want32)
}
if UintSize == 32 {
x := uint(m)
got := RotateRight(x, int(k))
want := x>>(k&0x1f) | x<<(32-k&0x1f)
if got != want {
t.Fatalf("RotateRight(%#08x, %d) == %#08x; want %#08x", x, k, got, want)
}
}
x64 := uint64(m)
got64 := RotateRight64(x64, int(k))
want64 := x64>>(k&0x3f) | x64<<(64-k&0x3f)
if got64 != want64 {
t.Fatalf("RotateRight64(%#016x, %d) == %#016x; want %#016x", x64, k, got64, want64)
}
if UintSize == 64 {
x := uint(m)
got := RotateRight(x, int(k))
want := x>>(k&0x3f) | x<<(64-k&0x3f)
if got != want {
t.Fatalf("RotateRight(%#016x, %d) == %#016x; want %#016x", x, k, got, want)
}
}
}
}
func TestReverse(t *testing.T) {
// test each bit
for i := uint(0); i < 64; i++ {
testReverse(t, uint64(1)<<i, uint64(1)<<(63-i))
}
// test a few patterns
for _, test := range []struct {
x, r uint64
}{
{0, 0},
{0x1, 0x8 << 60},
{0x2, 0x4 << 60},
{0x3, 0xc << 60},
{0x4, 0x2 << 60},
{0x5, 0xa << 60},
{0x6, 0x6 << 60},
{0x7, 0xe << 60},
{0x8, 0x1 << 60},
{0x9, 0x9 << 60},
{0xa, 0x5 << 60},
{0xb, 0xd << 60},
{0xc, 0x3 << 60},
{0xd, 0xb << 60},
{0xe, 0x7 << 60},
{0xf, 0xf << 60},
{0x5686487, 0xe12616a000000000},
{0x0123456789abcdef, 0xf7b3d591e6a2c480},
} {
testReverse(t, test.x, test.r)
testReverse(t, test.r, test.x)
}
}
func testReverse(t *testing.T, x64, want64 uint64) {
x8 := uint8(x64)
got8 := Reverse8(x8)
want8 := uint8(want64 >> (64 - 8))
if got8 != want8 {
t.Fatalf("Reverse8(%#02x) == %#02x; want %#02x", x8, got8, want8)
}
x16 := uint16(x64)
got16 := Reverse16(x16)
want16 := uint16(want64 >> (64 - 16))
if got16 != want16 {
t.Fatalf("Reverse16(%#04x) == %#04x; want %#04x", x16, got16, want16)
}
x32 := uint32(x64)
got32 := Reverse32(x32)
want32 := uint32(want64 >> (64 - 32))
if got32 != want32 {
t.Fatalf("Reverse32(%#08x) == %#08x; want %#08x", x32, got32, want32)
}
if UintSize == 32 {
x := uint(x32)
got := Reverse(x)
want := uint(want32)
if got != want {
t.Fatalf("Reverse(%#08x) == %#08x; want %#08x", x, got, want)
}
}
got64 := Reverse64(x64)
if got64 != want64 {
t.Fatalf("Reverse64(%#016x) == %#016x; want %#016x", x64, got64, want64)
}
if UintSize == 64 {
x := uint(x64)
got := Reverse(x)
want := uint(want64)
if got != want {
t.Fatalf("Reverse(%#08x) == %#016x; want %#016x", x, got, want)
}
}
}
func BenchmarkReverse(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += Reverse(uint(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverse8(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint8
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += Reverse8(uint8(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverse16(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint16
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += Reverse16(uint16(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverse32(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint32
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += Reverse32(uint32(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverse64(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint64
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += Reverse64(uint64(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func TestReverseBytes(t *testing.T) {
for _, test := range []struct {
x, r uint64
}{
{0, 0},
{0x01, 0x01 << 56},
{0x0123, 0x2301 << 48},
{0x012345, 0x452301 << 40},
{0x01234567, 0x67452301 << 32},
{0x0123456789, 0x8967452301 << 24},
{0x0123456789ab, 0xab8967452301 << 16},
{0x0123456789abcd, 0xcdab8967452301 << 8},
{0x0123456789abcdef, 0xefcdab8967452301 << 0},
} {
testReverseBytes(t, test.x, test.r)
testReverseBytes(t, test.r, test.x)
}
}
func testReverseBytes(t *testing.T, x64, want64 uint64) {
x16 := uint16(x64)
got16 := ReverseBytes16(x16)
want16 := uint16(want64 >> (64 - 16))
if got16 != want16 {
t.Fatalf("ReverseBytes16(%#04x) == %#04x; want %#04x", x16, got16, want16)
}
x32 := uint32(x64)
got32 := ReverseBytes32(x32)
want32 := uint32(want64 >> (64 - 32))
if got32 != want32 {
t.Fatalf("ReverseBytes32(%#08x) == %#08x; want %#08x", x32, got32, want32)
}
if UintSize == 32 {
x := uint(x32)
got := ReverseBytes(x)
want := uint(want32)
if got != want {
t.Fatalf("ReverseBytes(%#08x) == %#08x; want %#08x", x, got, want)
}
}
got64 := ReverseBytes64(x64)
if got64 != want64 {
t.Fatalf("ReverseBytes64(%#016x) == %#016x; want %#016x", x64, got64, want64)
}
if UintSize == 64 {
x := uint(x64)
got := ReverseBytes(x)
want := uint(want64)
if got != want {
t.Fatalf("ReverseBytes(%#016x) == %#016x; want %#016x", x, got, want)
}
}
}
func BenchmarkReverseBytes(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += ReverseBytes(uint(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverseBytes16(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint16
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += ReverseBytes16(uint16(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverseBytes32(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint32
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += ReverseBytes32(uint32(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func BenchmarkReverseBytes64(b *testing.B) {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
var s uint64
for i := 0; i < b.N; i++ {
math/bits: fix benchmarks (make sure calls don't get optimized away) Sum up function results and store them in an exported (global) variable. This prevents the compiler from optimizing away the otherwise side-effect free function calls. We now have more realistic set of benchmark numbers... Measured on 2.3 GHz Intel Core i7, running maxOS 10.12.3. Note: These measurements are based on the same "old" implementation as the prior measurements (commit 7d5c003). benchmark old ns/op new ns/op delta BenchmarkReverse-8 72.9 8.50 -88.34% BenchmarkReverse8-8 13.2 2.17 -83.56% BenchmarkReverse16-8 21.2 2.89 -86.37% BenchmarkReverse32-8 36.3 3.55 -90.22% BenchmarkReverse64-8 71.3 6.81 -90.45% BenchmarkReverseBytes-8 11.2 3.49 -68.84% BenchmarkReverseBytes16-8 6.24 0.93 -85.10% BenchmarkReverseBytes32-8 7.40 1.55 -79.05% BenchmarkReverseBytes64-8 10.5 2.47 -76.48% Reverse-8 72.9ns ± 0% 8.5ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 13.2ns ± 0% 2.2ns ± 0% ~ (p=1.000 n=1+1) Reverse16-8 21.2ns ± 0% 2.9ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 36.3ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 71.3ns ± 0% 6.8ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 11.2ns ± 0% 3.5ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 6.24ns ± 0% 0.93ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 7.40ns ± 0% 1.55ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 10.5ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I8aef1334b84f6cafd25edccad7e6868b37969efb Reviewed-on: https://go-review.googlesource.com/37213 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 12:37:38 -08:00
s += ReverseBytes64(uint64(i))
}
math/bits: faster OnesCount, added respective benchmarks Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-02-17 14:20:11 -08:00
Unused = int(s)
}
func TestLen(t *testing.T) {
for i := 0; i < 256; i++ {
len := 8 - tab[i].nlz
for k := 0; k < 64-8; k++ {
x := uint64(i) << uint(k)
want := 0
if x != 0 {
want = len + k
}
if x <= 1<<8-1 {
got := Len8(uint8(x))
if got != want {
t.Fatalf("Len8(%#02x) == %d; want %d", x, got, want)
}
}
if x <= 1<<16-1 {
got := Len16(uint16(x))
if got != want {
t.Fatalf("Len16(%#04x) == %d; want %d", x, got, want)
}
}
if x <= 1<<32-1 {
got := Len32(uint32(x))
if got != want {
t.Fatalf("Len32(%#08x) == %d; want %d", x, got, want)
}
if UintSize == 32 {
got := Len(uint(x))
if got != want {
t.Fatalf("Len(%#08x) == %d; want %d", x, got, want)
}
}
}
if x <= 1<<64-1 {
got := Len64(uint64(x))
if got != want {
t.Fatalf("Len64(%#016x) == %d; want %d", x, got, want)
}
if UintSize == 64 {
got := Len(uint(x))
if got != want {
t.Fatalf("Len(%#016x) == %d; want %d", x, got, want)
}
}
}
}
}
}
// ----------------------------------------------------------------------------
// Testing support
type entry = struct {
nlz, ntz, pop int
}
// tab contains results for all uint8 values
var tab [256]entry
func init() {
tab[0] = entry{8, 8, 0}
for i := 1; i < len(tab); i++ {
// nlz
x := i // x != 0
n := 0
for x&0x80 == 0 {
n++
x <<= 1
}
tab[i].nlz = n
// ntz
x = i // x != 0
n = 0
for x&1 == 0 {
n++
x >>= 1
}
tab[i].ntz = n
// pop
x = i // x != 0
n = 0
for x != 0 {
n += int(x & 1)
x >>= 1
}
tab[i].pop = n
}
}