mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
bytes: add optimized countByte for amd64
Use SSE/AVX2 when counting a single byte. Inspired from runtime indexbyte implementation. Benchmark against previous implementation, where 1 byte in every 8 is the one we are looking for: * On a machine without AVX2 name old time/op new time/op delta CountSingle/10-4 61.8ns ±10% 15.6ns ±11% -74.83% (p=0.000 n=10+10) CountSingle/32-4 100ns ± 4% 17ns ±10% -82.54% (p=0.000 n=10+9) CountSingle/4K-4 9.66µs ± 3% 0.37µs ± 6% -96.21% (p=0.000 n=10+10) CountSingle/4M-4 11.0ms ± 6% 0.4ms ± 4% -96.04% (p=0.000 n=10+10) CountSingle/64M-4 194ms ± 8% 8ms ± 2% -95.64% (p=0.000 n=10+10) name old speed new speed delta CountSingle/10-4 162MB/s ±10% 645MB/s ±10% +297.00% (p=0.000 n=10+10) CountSingle/32-4 321MB/s ± 5% 1844MB/s ± 9% +474.79% (p=0.000 n=10+9) CountSingle/4K-4 424MB/s ± 3% 11169MB/s ± 6% +2533.10% (p=0.000 n=10+10) CountSingle/4M-4 381MB/s ± 7% 9609MB/s ± 4% +2421.88% (p=0.000 n=10+10) CountSingle/64M-4 346MB/s ± 7% 7924MB/s ± 2% +2188.78% (p=0.000 n=10+10) * On a machine with AVX2 name old time/op new time/op delta CountSingle/10-8 37.1ns ± 3% 8.2ns ± 1% -77.80% (p=0.000 n=10+10) CountSingle/32-8 66.1ns ± 3% 9.8ns ± 2% -85.23% (p=0.000 n=10+10) CountSingle/4K-8 7.36µs ± 3% 0.11µs ± 1% -98.54% (p=0.000 n=10+10) CountSingle/4M-8 7.46ms ± 2% 0.15ms ± 2% -97.95% (p=0.000 n=10+9) CountSingle/64M-8 124ms ± 2% 6ms ± 4% -95.09% (p=0.000 n=10+10) name old speed new speed delta CountSingle/10-8 269MB/s ± 3% 1213MB/s ± 1% +350.32% (p=0.000 n=10+10) CountSingle/32-8 484MB/s ± 4% 3277MB/s ± 2% +576.66% (p=0.000 n=10+10) CountSingle/4K-8 556MB/s ± 3% 37933MB/s ± 1% +6718.36% (p=0.000 n=10+10) CountSingle/4M-8 562MB/s ± 2% 27444MB/s ± 3% +4783.43% (p=0.000 n=10+9) CountSingle/64M-8 543MB/s ± 2% 11054MB/s ± 3% +1935.81% (p=0.000 n=10+10) Fixes #19411 Change-Id: Ieaf20b1fabccabe767c55c66e242e86f3617f883 Reviewed-on: https://go-review.googlesource.com/38258 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
0ebaca6ba2
commit
01cd22c687
10 changed files with 317 additions and 4 deletions
|
|
@ -46,9 +46,8 @@ func explode(s []byte, n int) [][]byte {
|
||||||
return a[0:na]
|
return a[0:na]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count counts the number of non-overlapping instances of sep in s.
|
// countGeneric actualy implements Count
|
||||||
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
|
func countGeneric(s, sep []byte) int {
|
||||||
func Count(s, sep []byte) int {
|
|
||||||
n := 0
|
n := 0
|
||||||
// special case
|
// special case
|
||||||
if len(sep) == 0 {
|
if len(sep) == 0 {
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ package bytes
|
||||||
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
// indexShortStr requires 2 <= len(c) <= shortStringLen
|
||||||
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
|
||||||
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
func supportAVX2() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
func supportPOPCNT() bool // ../runtime/asm_$GOARCH.s
|
||||||
|
|
||||||
var shortStringLen int
|
var shortStringLen int
|
||||||
|
|
||||||
|
|
@ -94,6 +95,18 @@ func Index(s, sep []byte) int {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Special case for when we must count occurences of a single byte.
|
||||||
|
func countByte(s []byte, c byte) int
|
||||||
|
|
||||||
|
// Count counts the number of non-overlapping instances of sep in s.
|
||||||
|
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
|
||||||
|
func Count(s, sep []byte) int {
|
||||||
|
if len(sep) == 1 && supportPOPCNT() {
|
||||||
|
return countByte(s, sep[0])
|
||||||
|
}
|
||||||
|
return countGeneric(s, sep)
|
||||||
|
}
|
||||||
|
|
||||||
// primeRK is the prime base used in Rabin-Karp algorithm.
|
// primeRK is the prime base used in Rabin-Karp algorithm.
|
||||||
const primeRK = 16777619
|
const primeRK = 16777619
|
||||||
|
|
||||||
|
|
|
||||||
183
src/bytes/bytes_amd64.s
Normal file
183
src/bytes/bytes_amd64.s
Normal file
|
|
@ -0,0 +1,183 @@
|
||||||
|
// Copyright 2017 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// We use:
|
||||||
|
// SI: data
|
||||||
|
// BX: data len
|
||||||
|
// AL: byte sought
|
||||||
|
// This require the POPCNT instruction
|
||||||
|
TEXT ·countByte(SB),NOSPLIT,$0-40
|
||||||
|
MOVQ s+0(FP), SI
|
||||||
|
MOVQ s_len+8(FP), BX
|
||||||
|
MOVB c+24(FP), AL
|
||||||
|
|
||||||
|
// Shuffle X0 around so that each byte contains
|
||||||
|
// the character we're looking for.
|
||||||
|
MOVD AX, X0
|
||||||
|
PUNPCKLBW X0, X0
|
||||||
|
PUNPCKLBW X0, X0
|
||||||
|
PSHUFL $0, X0, X0
|
||||||
|
|
||||||
|
CMPQ BX, $16
|
||||||
|
JLT small
|
||||||
|
|
||||||
|
MOVQ $0, R12 // Accumulator
|
||||||
|
|
||||||
|
MOVQ SI, DI
|
||||||
|
|
||||||
|
CMPQ BX, $32
|
||||||
|
JA avx2
|
||||||
|
sse:
|
||||||
|
LEAQ -16(SI)(BX*1), AX // AX = address of last 16 bytes
|
||||||
|
JMP sseloopentry
|
||||||
|
|
||||||
|
sseloop:
|
||||||
|
// Move the next 16-byte chunk of the data into X1.
|
||||||
|
MOVOU (DI), X1
|
||||||
|
// Compare bytes in X0 to X1.
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
// Take the top bit of each byte in X1 and put the result in DX.
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Count number of matching bytes
|
||||||
|
POPCNTL DX, DX
|
||||||
|
// Accumulate into R12
|
||||||
|
ADDQ DX, R12
|
||||||
|
// Advance to next block.
|
||||||
|
ADDQ $16, DI
|
||||||
|
sseloopentry:
|
||||||
|
CMPQ DI, AX
|
||||||
|
JBE sseloop
|
||||||
|
|
||||||
|
// Get the number of bytes to consider in the last 16 bytes
|
||||||
|
ANDQ $15, BX
|
||||||
|
JZ end
|
||||||
|
|
||||||
|
// Create mask to ignore overlap between previous 16 byte block
|
||||||
|
// and the next.
|
||||||
|
MOVQ $16,CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ $0xFFFF, R10
|
||||||
|
SARQ CL, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
|
||||||
|
// Process the last 16-byte chunk. This chunk may overlap with the
|
||||||
|
// chunks we've already searched so we need to mask part of it.
|
||||||
|
MOVOU (AX), X1
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
ADDQ DX, R12
|
||||||
|
end:
|
||||||
|
MOVQ R12, ret+32(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
// handle for lengths < 16
|
||||||
|
small:
|
||||||
|
TESTQ BX, BX
|
||||||
|
JEQ endzero
|
||||||
|
|
||||||
|
// Check if we'll load across a page boundary.
|
||||||
|
LEAQ 16(SI), AX
|
||||||
|
TESTW $0xff0, AX
|
||||||
|
JEQ endofpage
|
||||||
|
|
||||||
|
// We must ignore high bytes as they aren't part of our slice.
|
||||||
|
// Create mask.
|
||||||
|
MOVB BX, CX
|
||||||
|
MOVQ $1, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
SUBQ $1, R10
|
||||||
|
|
||||||
|
// Load data
|
||||||
|
MOVOU (SI), X1
|
||||||
|
// Compare target byte with each byte in data.
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
// Move result bits to integer register.
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
// Directly return DX, we don't need to accumulate
|
||||||
|
// since we have <16 bytes.
|
||||||
|
MOVQ DX, ret+32(FP)
|
||||||
|
RET
|
||||||
|
endzero:
|
||||||
|
MOVQ $0, ret+32(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
endofpage:
|
||||||
|
// We must ignore low bytes as they aren't part of our slice.
|
||||||
|
MOVQ $16,CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ $0xFFFF, R10
|
||||||
|
SARQ CL, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
|
||||||
|
// Load data into the high end of X1.
|
||||||
|
MOVOU -16(SI)(BX*1), X1
|
||||||
|
// Compare target byte with each byte in data.
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
// Move result bits to integer register.
|
||||||
|
PMOVMSKB X1, DX
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
// Directly return DX, we don't need to accumulate
|
||||||
|
// since we have <16 bytes.
|
||||||
|
POPCNTL DX, DX
|
||||||
|
MOVQ DX, ret+32(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
avx2:
|
||||||
|
CMPB runtime·support_avx2(SB), $1
|
||||||
|
JNE sse
|
||||||
|
MOVD AX, X0
|
||||||
|
LEAQ -32(SI)(BX*1), R11
|
||||||
|
VPBROADCASTB X0, Y1
|
||||||
|
avx2_loop:
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
ADDQ DX, R12
|
||||||
|
ADDQ $32, DI
|
||||||
|
CMPQ DI, R11
|
||||||
|
JLE avx2_loop
|
||||||
|
|
||||||
|
// If last block is already processed,
|
||||||
|
// skip to the end.
|
||||||
|
CMPQ DI, R11
|
||||||
|
JEQ endavx
|
||||||
|
|
||||||
|
// Load address of the last 32 bytes.
|
||||||
|
// There is an overlap with the previous block.
|
||||||
|
MOVQ R11, DI
|
||||||
|
VMOVDQU (DI), Y2
|
||||||
|
VPCMPEQB Y1, Y2, Y3
|
||||||
|
VPMOVMSKB Y3, DX
|
||||||
|
// Exit AVX mode.
|
||||||
|
VZEROUPPER
|
||||||
|
|
||||||
|
// Create mask to ignore overlap between previous 32 byte block
|
||||||
|
// and the next.
|
||||||
|
ANDQ $31, BX
|
||||||
|
MOVQ $32,CX
|
||||||
|
SUBQ BX, CX
|
||||||
|
MOVQ $0xFFFFFFFF, R10
|
||||||
|
SARQ CL, R10
|
||||||
|
SALQ CL, R10
|
||||||
|
// Apply mask
|
||||||
|
ANDQ R10, DX
|
||||||
|
POPCNTL DX, DX
|
||||||
|
ADDQ DX, R12
|
||||||
|
MOVQ R12, ret+32(FP)
|
||||||
|
RET
|
||||||
|
endavx:
|
||||||
|
// Exit AVX mode.
|
||||||
|
VZEROUPPER
|
||||||
|
MOVQ R12, ret+32(FP)
|
||||||
|
RET
|
||||||
|
|
@ -39,3 +39,9 @@ func Index(s, sep []byte) int {
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count counts the number of non-overlapping instances of sep in s.
|
||||||
|
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
|
||||||
|
func Count(s, sep []byte) int {
|
||||||
|
return countGeneric(s, sep)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -97,6 +97,12 @@ func Index(s, sep []byte) int {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count counts the number of non-overlapping instances of sep in s.
|
||||||
|
// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
|
||||||
|
func Count(s, sep []byte) int {
|
||||||
|
return countGeneric(s, sep)
|
||||||
|
}
|
||||||
|
|
||||||
// primeRK is the prime base used in Rabin-Karp algorithm.
|
// primeRK is the prime base used in Rabin-Karp algorithm.
|
||||||
const primeRK = 16777619
|
const primeRK = 16777619
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -396,6 +396,79 @@ func TestIndexRune(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test count of a single byte across page offsets
|
||||||
|
func TestCountByte(t *testing.T) {
|
||||||
|
b := make([]byte, 5015) // bigger than a page
|
||||||
|
windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128}
|
||||||
|
testCountWindow := func(i, window int) {
|
||||||
|
for j := 0; j < window; j++ {
|
||||||
|
b[i+j] = byte(100)
|
||||||
|
p := Count(b[i:i+window], []byte{100})
|
||||||
|
if p != j+1 {
|
||||||
|
t.Errorf("TestCountByte.Count(%q, 100) = %d", b[i:i+window], p)
|
||||||
|
}
|
||||||
|
pGeneric := CountGeneric(b[i:i+window], []byte{100})
|
||||||
|
if pGeneric != j+1 {
|
||||||
|
t.Errorf("TestCountByte.CountGeneric(%q, 100) = %d", b[i:i+window], p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
maxWnd := windows[len(windows)-1]
|
||||||
|
|
||||||
|
for i := 0; i <= 2*maxWnd; i++ {
|
||||||
|
for _, window := range windows {
|
||||||
|
if window > len(b[i:]) {
|
||||||
|
window = len(b[i:])
|
||||||
|
}
|
||||||
|
testCountWindow(i, window)
|
||||||
|
for j := 0; j < window; j++ {
|
||||||
|
b[i+j] = byte(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := 4096 - (maxWnd + 1); i < len(b); i++ {
|
||||||
|
for _, window := range windows {
|
||||||
|
if window > len(b[i:]) {
|
||||||
|
window = len(b[i:])
|
||||||
|
}
|
||||||
|
testCountWindow(i, window)
|
||||||
|
for j := 0; j < window; j++ {
|
||||||
|
b[i+j] = byte(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure we don't count bytes outside our window
|
||||||
|
func TestCountByteNoMatch(t *testing.T) {
|
||||||
|
b := make([]byte, 5015)
|
||||||
|
windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128}
|
||||||
|
for i := 0; i <= len(b); i++ {
|
||||||
|
for _, window := range windows {
|
||||||
|
if window > len(b[i:]) {
|
||||||
|
window = len(b[i:])
|
||||||
|
}
|
||||||
|
// Fill the window with non-match
|
||||||
|
for j := 0; j < window; j++ {
|
||||||
|
b[i+j] = byte(100)
|
||||||
|
}
|
||||||
|
// Try to find something that doesn't exist
|
||||||
|
p := Count(b[i:i+window], []byte{0})
|
||||||
|
if p != 0 {
|
||||||
|
t.Errorf("TestCountByteNoMatch(%q, 0) = %d", b[i:i+window], p)
|
||||||
|
}
|
||||||
|
pGeneric := CountGeneric(b[i:i+window], []byte{0})
|
||||||
|
if pGeneric != 0 {
|
||||||
|
t.Errorf("TestCountByteNoMatch.CountGeneric(%q, 100) = %d", b[i:i+window], p)
|
||||||
|
}
|
||||||
|
for j := 0; j < window; j++ {
|
||||||
|
b[i+j] = byte(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var bmbuf []byte
|
var bmbuf []byte
|
||||||
|
|
||||||
func valName(x int) string {
|
func valName(x int) string {
|
||||||
|
|
@ -589,6 +662,26 @@ func BenchmarkCountEasy(b *testing.B) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkCountSingle(b *testing.B) {
|
||||||
|
benchBytes(b, indexSizes, func(b *testing.B, n int) {
|
||||||
|
buf := bmbuf[0:n]
|
||||||
|
step := 8
|
||||||
|
for i := 0; i < len(buf); i += step {
|
||||||
|
buf[i] = 1
|
||||||
|
}
|
||||||
|
expect := (len(buf) + (step - 1)) / step
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
j := Count(buf, []byte{1})
|
||||||
|
if j != expect {
|
||||||
|
b.Fatal("bad count", j, expect)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := 0; i < len(buf); i++ {
|
||||||
|
buf[i] = 0
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
type ExplodeTest struct {
|
type ExplodeTest struct {
|
||||||
s string
|
s string
|
||||||
n int
|
n int
|
||||||
|
|
|
||||||
|
|
@ -7,3 +7,4 @@ package bytes
|
||||||
// Export func for testing
|
// Export func for testing
|
||||||
var IndexBytePortable = indexBytePortable
|
var IndexBytePortable = indexBytePortable
|
||||||
var EqualPortable = equalPortable
|
var EqualPortable = equalPortable
|
||||||
|
var CountGeneric = countGeneric
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ runtime/asm_amd64.s: [GOARCH] cannot check cross-package assembly function: Comp
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package strings
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package strings
|
||||||
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package bytes
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportAVX2 is in package bytes
|
||||||
|
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: supportPOPCNT is in package bytes
|
||||||
|
|
||||||
// Intentionally missing declarations. These are special assembly routines.
|
// Intentionally missing declarations. These are special assembly routines.
|
||||||
// Some are jumped into from other routines, with values in specific registers.
|
// Some are jumped into from other routines, with values in specific registers.
|
||||||
|
|
|
||||||
|
|
@ -91,8 +91,13 @@ testbmi1:
|
||||||
testbmi2:
|
testbmi2:
|
||||||
MOVB $0, runtime·support_bmi2(SB)
|
MOVB $0, runtime·support_bmi2(SB)
|
||||||
TESTL $(1<<8), runtime·cpuid_ebx7(SB) // check for BMI2 bit
|
TESTL $(1<<8), runtime·cpuid_ebx7(SB) // check for BMI2 bit
|
||||||
JEQ nocpuinfo
|
JEQ testpopcnt
|
||||||
MOVB $1, runtime·support_bmi2(SB)
|
MOVB $1, runtime·support_bmi2(SB)
|
||||||
|
testpopcnt:
|
||||||
|
MOVB $0, runtime·support_popcnt(SB)
|
||||||
|
TESTL $(1<<23), runtime·cpuid_ecx(SB) // check for POPCNT bit
|
||||||
|
JEQ nocpuinfo
|
||||||
|
MOVB $1, runtime·support_popcnt(SB)
|
||||||
nocpuinfo:
|
nocpuinfo:
|
||||||
|
|
||||||
// if there is an _cgo_init, call it.
|
// if there is an _cgo_init, call it.
|
||||||
|
|
@ -1697,6 +1702,11 @@ TEXT bytes·supportAVX2(SB),NOSPLIT,$0-1
|
||||||
MOVB AX, ret+0(FP)
|
MOVB AX, ret+0(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT bytes·supportPOPCNT(SB),NOSPLIT,$0-1
|
||||||
|
MOVBLZX runtime·support_popcnt(SB), AX
|
||||||
|
MOVB AX, ret+0(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
|
||||||
MOVQ s+0(FP), DI
|
MOVQ s+0(FP), DI
|
||||||
// We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
// We want len in DX and AX, because PCMPESTRI implicitly consumes them
|
||||||
|
|
|
||||||
|
|
@ -728,6 +728,7 @@ var (
|
||||||
support_avx2 bool
|
support_avx2 bool
|
||||||
support_bmi1 bool
|
support_bmi1 bool
|
||||||
support_bmi2 bool
|
support_bmi2 bool
|
||||||
|
support_popcnt bool
|
||||||
|
|
||||||
goarm uint8 // set by cmd/link on arm systems
|
goarm uint8 // set by cmd/link on arm systems
|
||||||
framepointer_enabled bool // set by cmd/link
|
framepointer_enabled bool // set by cmd/link
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue