go/src/internal/cpu/cpu_x86.go

// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build 386 || amd64

package cpu

const CacheLinePadSize = 64

// cpuid is implemented in cpu_x86.s.
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)

// xgetbv with ecx = 0 is implemented in cpu_x86.s.
func xgetbv() (eax, edx uint32)

// getGOAMD64level is implemented in cpu_x86.s. Returns number in [1,4].
func getGOAMD64level() int32

const (
	// ecx bits
	cpuid_SSE3      = 1 << 0
	cpuid_PCLMULQDQ = 1 << 1
	cpuid_SSSE3     = 1 << 9
	cpuid_GFNI      = 1 << 8
	cpuid_FMA       = 1 << 12
	cpuid_SSE41     = 1 << 19
	cpuid_SSE42     = 1 << 20
	cpuid_POPCNT    = 1 << 23
	cpuid_AES       = 1 << 25
	cpuid_OSXSAVE   = 1 << 27
	cpuid_AVX       = 1 << 28

	// ebx bits
	cpuid_BMI1     = 1 << 3
	cpuid_AVX2     = 1 << 5
	cpuid_BMI2     = 1 << 8
	cpuid_ERMS     = 1 << 9
	cpuid_AVX512F  = 1 << 16
	cpuid_AVX512DQ = 1 << 17
	cpuid_ADX      = 1 << 19
	cpuid_AVX512CD = 1 << 28
	cpuid_SHA      = 1 << 29
	cpuid_AVX512BW = 1 << 30
	cpuid_AVX512VL = 1 << 31
	// edx bits
	cpuid_FSRM = 1 << 4
	// edx bits for CPUID 0x80000001
	cpuid_RDTSCP = 1 << 27
)

var maxExtendedFunctionInformation uint32

func doinit() {
	options = []option{
		{Name: "adx", Feature: &X86.HasADX},
		{Name: "aes", Feature: &X86.HasAES},
		{Name: "erms", Feature: &X86.HasERMS},
		{Name: "fsrm", Feature: &X86.HasFSRM},
		{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
		{Name: "rdtscp", Feature: &X86.HasRDTSCP},
		{Name: "sha", Feature: &X86.HasSHA},
	}
	level := getGOAMD64level()
	if level < 2 {
		// These options are required at level 2. At lower levels
		// they can be turned off.
		options = append(options,
			option{Name: "popcnt", Feature: &X86.HasPOPCNT},
			option{Name: "sse3", Feature: &X86.HasSSE3},
			option{Name: "sse41", Feature: &X86.HasSSE41},
			option{Name: "sse42", Feature: &X86.HasSSE42},
			option{Name: "ssse3", Feature: &X86.HasSSSE3})
	}
	if level < 3 {
		// These options are required at level 3. At lower levels
		// they can be turned off.
		options = append(options,
			option{Name: "avx", Feature: &X86.HasAVX},
			option{Name: "avx2", Feature: &X86.HasAVX2},
			option{Name: "bmi1", Feature: &X86.HasBMI1},
			option{Name: "bmi2", Feature: &X86.HasBMI2},
			option{Name: "fma", Feature: &X86.HasFMA})
	}
	if level < 4 {
		// These options are required at level 4. At lower levels
		// they can be turned off.
		options = append(options,
			option{Name: "avx512f", Feature: &X86.HasAVX512F},
			option{Name: "avx512cd", Feature: &X86.HasAVX512CD},
			option{Name: "avx512bw", Feature: &X86.HasAVX512BW},
			option{Name: "avx512dq", Feature: &X86.HasAVX512DQ},
			option{Name: "avx512vl", Feature: &X86.HasAVX512VL},
		)
	}

	maxID, _, _, _ := cpuid(0, 0)

	if maxID < 1 {
		return
	}

	maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)

	_, _, ecx1, _ := cpuid(1, 0)

	X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
	X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
	X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
	X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
	X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
	X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
	X86.HasAES = isSet(ecx1, cpuid_AES)

	// OSXSAVE can be false when using older Operating Systems
	// or when explicitly disabled on newer Operating Systems by
	// e.g. setting the xsavedisable boot option on Windows 10.
	X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)

	// The FMA instruction set extension only has VEX prefixed instructions.
	// VEX prefixed instructions require OSXSAVE to be enabled.
	// See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2
	// Section 2.4 "AVX and SSE Instruction Exception Specification"
	X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasOSXSAVE

	osSupportsAVX := false
	osSupportsAVX512 := false
	// For XGETBV, OSXSAVE bit is required and sufficient.
	if X86.HasOSXSAVE {
		eax, _ := xgetbv()
		// Check if XMM and YMM registers have OS support.
		osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)

		// AVX512 detection does not work on Darwin,
		// see https://github.com/golang/go/issues/49233
		//
		// Check if opmask, ZMMhi256 and Hi16_ZMM have OS support.
		osSupportsAVX512 = osSupportsAVX && isSet(eax, 1<<5) && isSet(eax, 1<<6) && isSet(eax, 1<<7)
	}

	X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX

	if maxID < 7 {
		return
	}

	_, ebx7, ecx7, edx7 := cpuid(7, 0)
	X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
	X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
	X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
	X86.HasERMS = isSet(ebx7, cpuid_ERMS)
	X86.HasADX = isSet(ebx7, cpuid_ADX)
	X86.HasSHA = isSet(ebx7, cpuid_SHA)

	X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512
	if X86.HasAVX512F {
		X86.HasAVX512CD = isSet(ebx7, cpuid_AVX512CD)
		X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW)
		X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ)
		X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
	}

	X86.HasFSRM = isSet(edx7, cpuid_FSRM)
	X86.HasGFNI = isSet(ecx7, cpuid_GFNI)

	var maxExtendedInformation uint32
	maxExtendedInformation, _, _, _ = cpuid(0x80000000, 0)

	if maxExtendedInformation < 0x80000001 {
		return
	}

	_, _, _, edxExt1 := cpuid(0x80000001, 0)
	X86.HasRDTSCP = isSet(edxExt1, cpuid_RDTSCP)

	doDerived = func() {
		// Rather than carefully gating on fundamental AVX-512 features, we have
		// a virtual "AVX512" feature that captures F+CD+BW+DQ+VL. BW, DQ, and
		// VL have a huge effect on which AVX-512 instructions are available,
		// and these have all been supported on everything except the earliest
		// Phi chips with AVX-512. No CPU has had CD without F, so we include
		// it. GOAMD64=v4 also implies exactly this set, and these are all
		// included in AVX10.1.
		X86.HasAVX512 = X86.HasAVX512F && X86.HasAVX512CD && X86.HasAVX512BW && X86.HasAVX512DQ && X86.HasAVX512VL
		X86.HasAVX512GFNI = X86.HasAVX512 && X86.HasGFNI
	}
}

func isSet(hwc uint32, value uint32) bool {
	return hwc&value != 0
}

// Name returns the CPU name given by the vendor.
// If the CPU name can not be determined an
// empty string is returned.
func Name() string {
	if maxExtendedFunctionInformation < 0x80000004 {
		return ""
	}

	data := make([]byte, 0, 3*4*4)

	var eax, ebx, ecx, edx uint32
	eax, ebx, ecx, edx = cpuid(0x80000002, 0)
	data = appendBytes(data, eax, ebx, ecx, edx)
	eax, ebx, ecx, edx = cpuid(0x80000003, 0)
	data = appendBytes(data, eax, ebx, ecx, edx)
	eax, ebx, ecx, edx = cpuid(0x80000004, 0)
	data = appendBytes(data, eax, ebx, ecx, edx)

	// Trim leading spaces.
	for len(data) > 0 && data[0] == ' ' {
		data = data[1:]
	}

	// Trim tail after and including the first null byte.
	for i, c := range data {
		if c == '\x00' {
			data = data[:i]
			break
		}
	}

	return string(data)
}

func appendBytes(b []byte, args ...uint32) []byte {
	for _, arg := range args {
		b = append(b,
			byte((arg >> 0)),
			byte((arg >> 8)),
			byte((arg >> 16)),
			byte((arg >> 24)))
	}
	return b
}
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+								// Copyright 2017 The Go Authors. All rights reserved.
 								// Use of this source code is governed by a BSD-style
 								// license that can be found in the LICENSE file.
-												all: go fmt std cmd (but revert vendor)

Make all our package sources use Go 1.17 gofmt format
(adding //go:build lines).

Part of //go:build change (#41184).
See https://golang.org/design/draft-gobuild

Change-Id: Ia0534360e4957e58cd9a18429c39d0e32a6addb4
Reviewed-on: https://go-review.googlesource.com/c/go/+/294430
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>

											
										
										
											2021-02-19 18:35:10 -05:00
+								//go:build 386 || amd64
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
 								package cpu
-												internal/cpu: add a CacheLinePadSize constant

The new constant CacheLinePadSize can be used to compute best effort
alignment of structs to cache lines.

e.g. the runtime can use this in the locktab definition:
var locktab [57]struct {
        l   spinlock
        pad [cpu.CacheLinePadSize - unsafe.Sizeof(spinlock{})]byte
}

Change-Id: I86f6fbfc5ee7436f742776a7d4a99a1d54ffccc8
Reviewed-on: https://go-review.googlesource.com/131237
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2018-08-24 17:07:20 +02:00
+								const CacheLinePadSize = 64
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
 								// cpuid is implemented in cpu_x86.s.
 								func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
 								// xgetbv with ecx = 0 is implemented in cpu_x86.s.
 								func xgetbv() (eax, edx uint32)
-												internal/cpu: disallow disabling options that are required for microarch

e.g., if GOAMD64=v3, don't allow GODEBUG=cpu.XXX=off for XXX which
are required for v3.

Change-Id: Ib58a4c8b13c5464ba476448ba44bbb261218787c
Reviewed-on: https://go-review.googlesource.com/c/go/+/391694
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>

											
										
										
											2022-03-10 15:26:22 -08:00
+								// getGOAMD64level is implemented in cpu_x86.s. Returns number in [1,4].
 								func getGOAMD64level() int32
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+								const (
 									// ecx bits
 									cpuid_SSE3      = 1 << 0
 									cpuid_PCLMULQDQ = 1 << 1
 									cpuid_SSSE3     = 1 << 9
-												[dev.simd] internal/cpu: add GFNI feature check

This CL amends HasAVX512 flag with GFNI check.

This is needed because our SIMD API supports Galois Field operations.

Change-Id: I3e957b7b2215d2b7b6b8a7a0ca3e2e60d453b2e5
Reviewed-on: https://go-review.googlesource.com/c/go/+/685295
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-07-01 18:00:33 +00:00
+									cpuid_GFNI      = 1 << 8
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+									cpuid_FMA       = 1 << 12
 									cpuid_SSE41     = 1 << 19
 									cpuid_SSE42     = 1 << 20
 									cpuid_POPCNT    = 1 << 23
 									cpuid_AES       = 1 << 25
 									cpuid_OSXSAVE   = 1 << 27
 									cpuid_AVX       = 1 << 28
 									// ebx bits
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+									cpuid_BMI1     = 1 << 3
 									cpuid_AVX2     = 1 << 5
 									cpuid_BMI2     = 1 << 8
 									cpuid_ERMS     = 1 << 9
 									cpuid_AVX512F  = 1 << 16
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+									cpuid_AVX512DQ = 1 << 17
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+									cpuid_ADX      = 1 << 19
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+									cpuid_AVX512CD = 1 << 28
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+									cpuid_SHA      = 1 << 29
 									cpuid_AVX512BW = 1 << 30
 									cpuid_AVX512VL = 1 << 31
-												runtime: add ERMS-based memmove support for modern CPU platforms

The current memmove implementation uses REP MOVSB to copy data larger than
2KB when the useAVXmemmove global variable is false and the CPU supports
the ERMS feature.

This feature is currently only enabled on CPUs in the Sandy Bridge (Client)
, Sandy Bridge (Server), Ivy Bridge (Client), and Ivy Bridge (Server)
microarchitectures.

For modern Intel CPU microarchitectures that support the ERMS feature, such
as Ice Lake (Server), Sapphire Rapids , REP MOVSB achieves better
performance than the AVX-based copy currently implemented in memmove.

Benchstat result:

goos: linux
goarch: amd64
pkg: runtime
cpu: Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz
               │  ./old.txt  │              ./new.txt              │
               │   sec/op    │   sec/op     vs base                │
Memmove/2048-2   25.24n ± 0%   24.27n ± 0%   -3.84% (p=0.000 n=10)
Memmove/4096-2   44.87n ± 0%   33.16n ± 1%  -26.11% (p=0.000 n=10)
geomean          33.65n        28.37n       -15.71%

               │  ./old.txt   │               ./new.txt               │
               │     B/s      │      B/s       vs base                │
Memmove/2048-2   75.56Gi ± 0%    78.59Gi ± 0%   +4.02% (p=0.000 n=10)
Memmove/4096-2   85.01Gi ± 0%   115.05Gi ± 1%  +35.34% (p=0.000 n=10)
geomean          80.14Gi         95.09Gi       +18.65%

Fixes #66958

Change-Id: I1fafd1b51a16752f83ac15047cf3b29422a79d5d
GitHub-Last-Rev: 89cf5af32b1b41e1499282058656a8a5c7aed359
GitHub-Pull-Request: golang/go#66959
Reviewed-on: https://go-review.googlesource.com/c/go/+/580735
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>

											
										
										
											2024-07-02 04:02:12 +00:00
+									// edx bits
 									cpuid_FSRM = 1 << 4
-												runtime: use RDTSCP for instruction stream serialized read of TSC

To measure all instructions having been completed before reading
the time stamp counter with RDTSC an instruction sequence that
has instruction stream serializing properties which guarantee
waiting until all previous instructions have been executed is
needed. This does not necessary mean to wait for all stores to
be globally visible.

This CL aims to remove vendor specific logic for determining the
instruction sequence with CPU feature flag checks that are
CPU vendor independent.

For intel LFENCE has the wanted properties at least
since it was introduced together with SSE2 support.

On AMD instruction stream serializing LFENCE is supported by setting
an MSR C001_1029[1]=1 on AMD family 10h/12h/14h/15h/16h/17h processors.
AMD family 0Fh/11h processors support LFENCE as serializing always.
AMD plans support for this MSR and access to this bit for all future processors.
Source: https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf

Reading the MSR to determine LFENCE properties is not always possible
or reliable (hypervisors). The Linux kernel is relying on serializing
LFENCE on AMD CPUs since a commit in July 2019: https://lkml.org/lkml/2019/7/22/295
and the MSR C001_1029 to enable serialization has been set by default
with the Spectre v1 mitigations.

Using an MFENCE on AMD is waiting on previous instructions having been executed
but in addition also flushes store buffers.

To align the serialization properties without runtime detection
of CPU manufacturers we can use the newer RDTSCP instruction which
waits until all previous instructions have been executed.

RDTSCP is available on Intel since around 2008 and on AMD CPUs since
around 2006. Support for RDTSCP can be checked independently
of manufacturer by checking CPUID bits.

Using RDTSCP is the default in Linux to read TSC in program order
when the instruction is available.
https://github.com/torvalds/linux/blob/e22ce8eb631bdc47a4a4ea7ecf4e4ba499db4f93/arch/x86/include/asm/msr.h#L231

Change-Id: Ifa841843b9abb2816f8f0754a163ebf01385306d
Reviewed-on: https://go-review.googlesource.com/c/go/+/344429
Reviewed-by: Keith Randall <khr@golang.org>
Trust: Martin Möhrmann <martin@golang.org>
Run-TryBot: Martin Möhrmann <martin@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>

											
										
										
											2021-08-23 13:53:22 +02:00
+									// edx bits for CPUID 0x80000001
 									cpuid_RDTSCP = 1 << 27
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+								)
-												testing: print cpu type as label for benchmarks

Supports 386 and amd64 architectures on all operating systems.

Example output:
$ go test -bench=.*
goos: darwin
goarch: amd64
pkg: strconv
cpu: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz
BenchmarkAtof64Decimal-4        	24431032	        46.8 ns/op
...

As the displayed CPU information is only used for information
purposes it is lazily initialized when needed using the new
internal/sysinfo package.

This allows internal/cpu to stay without dependencies and avoid
initialization costs when the CPU information is not needed as
the new code to query the CPU name in internal/cpu can be
dead code eliminated if not used.

Fixes #39214

Change-Id: I77ae5c5d2fed6b28fa78dd45075f9f0a6a7f1bfd
Reviewed-on: https://go-review.googlesource.com/c/go/+/263804
Trust: Martin Möhrmann <moehrmann@google.com>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2020-10-20 09:56:14 +02:00
+								var maxExtendedFunctionInformation uint32
-												internal/cpu,runtime: call cpu.initialize before alginit

runtime.alginit needs runtime/support_{aes,ssse3,sse41} feature flag
to init aeshash function but internal/cpu.init not be called yet.
This CL will call internal/cpu.initialize before runtime.alginit, so
that we can move all cpu features related code to internal/cpu.

Change-Id: I00b8e403ace3553f8c707563d95f27dade0bc853
Reviewed-on: https://go-review.googlesource.com/104636
Reviewed-by: Tobias Klauser <tobias.klauser@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2018-04-10 22:33:03 +08:00
+								func doinit() {
-												internal/cpu: add experiment to disable CPU features with GODEBUGCPU

Needs the go compiler to be build with GOEXPERIMENT=debugcpu to be active.

The GODEBUGCPU environment variable can be used to disable usage of
specific processor features in the Go standard library.
This is useful for testing and benchmarking different code paths that
are guarded by internal/cpu variable checks.

Use of processor features can not be enabled through GODEBUGCPU.

To disable usage of AVX and SSE41 cpu features on GOARCH amd64 use:
GODEBUGCPU=avx=0,sse41=0

The special "all" option can be used to disable all options:
GODEBUGCPU=all=0

Updates #12805
Updates #15403

Change-Id: I699c2e6f74d98472b6fb4b1e5ffbf29b15697aab
Reviewed-on: https://go-review.googlesource.com/91737
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-01-26 12:14:27 +01:00
+									options = []option{
-												internal/cpu: add invalid option warnings and support to enable cpu features

This CL adds the ability to enable the cpu feature FEATURE by specifying
FEATURE=on in GODEBUGCPU. Syntax support to enable cpu features is useful
in combination with a preceeding all=off to disable all but some specific
cpu features. Example:

GODEBUGCPU=all=off,sse3=on

This CL implements printing of warnings for invalid GODEBUGCPU settings:
- requests enabling features that are not supported with the current CPU
- specifying values different than 'on' or 'off' for a feature
- settings for unkown cpu feature names

Updates #27218

Change-Id: Ic13e5c4c35426a390c50eaa4bd2a408ef2ee21be
Reviewed-on: https://go-review.googlesource.com/c/141800
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2018-10-12 19:17:21 +02:00
+										{Name: "adx", Feature: &X86.HasADX},
 										{Name: "aes", Feature: &X86.HasAES},
 										{Name: "erms", Feature: &X86.HasERMS},
-												runtime: add ERMS-based memmove support for modern CPU platforms

The current memmove implementation uses REP MOVSB to copy data larger than
2KB when the useAVXmemmove global variable is false and the CPU supports
the ERMS feature.

This feature is currently only enabled on CPUs in the Sandy Bridge (Client)
, Sandy Bridge (Server), Ivy Bridge (Client), and Ivy Bridge (Server)
microarchitectures.

For modern Intel CPU microarchitectures that support the ERMS feature, such
as Ice Lake (Server), Sapphire Rapids , REP MOVSB achieves better
performance than the AVX-based copy currently implemented in memmove.

Benchstat result:

goos: linux
goarch: amd64
pkg: runtime
cpu: Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz
               │  ./old.txt  │              ./new.txt              │
               │   sec/op    │   sec/op     vs base                │
Memmove/2048-2   25.24n ± 0%   24.27n ± 0%   -3.84% (p=0.000 n=10)
Memmove/4096-2   44.87n ± 0%   33.16n ± 1%  -26.11% (p=0.000 n=10)
geomean          33.65n        28.37n       -15.71%

               │  ./old.txt   │               ./new.txt               │
               │     B/s      │      B/s       vs base                │
Memmove/2048-2   75.56Gi ± 0%    78.59Gi ± 0%   +4.02% (p=0.000 n=10)
Memmove/4096-2   85.01Gi ± 0%   115.05Gi ± 1%  +35.34% (p=0.000 n=10)
geomean          80.14Gi         95.09Gi       +18.65%

Fixes #66958

Change-Id: I1fafd1b51a16752f83ac15047cf3b29422a79d5d
GitHub-Last-Rev: 89cf5af32b1b41e1499282058656a8a5c7aed359
GitHub-Pull-Request: golang/go#66959
Reviewed-on: https://go-review.googlesource.com/c/go/+/580735
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>

											
										
										
											2024-07-02 04:02:12 +00:00
+										{Name: "fsrm", Feature: &X86.HasFSRM},
-												internal/cpu: add invalid option warnings and support to enable cpu features

This CL adds the ability to enable the cpu feature FEATURE by specifying
FEATURE=on in GODEBUGCPU. Syntax support to enable cpu features is useful
in combination with a preceeding all=off to disable all but some specific
cpu features. Example:

GODEBUGCPU=all=off,sse3=on

This CL implements printing of warnings for invalid GODEBUGCPU settings:
- requests enabling features that are not supported with the current CPU
- specifying values different than 'on' or 'off' for a feature
- settings for unkown cpu feature names

Updates #27218

Change-Id: Ic13e5c4c35426a390c50eaa4bd2a408ef2ee21be
Reviewed-on: https://go-review.googlesource.com/c/141800
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2018-10-12 19:17:21 +02:00
+										{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
-												runtime: use RDTSCP for instruction stream serialized read of TSC

To measure all instructions having been completed before reading
the time stamp counter with RDTSC an instruction sequence that
has instruction stream serializing properties which guarantee
waiting until all previous instructions have been executed is
needed. This does not necessary mean to wait for all stores to
be globally visible.

This CL aims to remove vendor specific logic for determining the
instruction sequence with CPU feature flag checks that are
CPU vendor independent.

For intel LFENCE has the wanted properties at least
since it was introduced together with SSE2 support.

On AMD instruction stream serializing LFENCE is supported by setting
an MSR C001_1029[1]=1 on AMD family 10h/12h/14h/15h/16h/17h processors.
AMD family 0Fh/11h processors support LFENCE as serializing always.
AMD plans support for this MSR and access to this bit for all future processors.
Source: https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf

Reading the MSR to determine LFENCE properties is not always possible
or reliable (hypervisors). The Linux kernel is relying on serializing
LFENCE on AMD CPUs since a commit in July 2019: https://lkml.org/lkml/2019/7/22/295
and the MSR C001_1029 to enable serialization has been set by default
with the Spectre v1 mitigations.

Using an MFENCE on AMD is waiting on previous instructions having been executed
but in addition also flushes store buffers.

To align the serialization properties without runtime detection
of CPU manufacturers we can use the newer RDTSCP instruction which
waits until all previous instructions have been executed.

RDTSCP is available on Intel since around 2008 and on AMD CPUs since
around 2006. Support for RDTSCP can be checked independently
of manufacturer by checking CPUID bits.

Using RDTSCP is the default in Linux to read TSC in program order
when the instruction is available.
https://github.com/torvalds/linux/blob/e22ce8eb631bdc47a4a4ea7ecf4e4ba499db4f93/arch/x86/include/asm/msr.h#L231

Change-Id: Ifa841843b9abb2816f8f0754a163ebf01385306d
Reviewed-on: https://go-review.googlesource.com/c/go/+/344429
Reviewed-by: Keith Randall <khr@golang.org>
Trust: Martin Möhrmann <martin@golang.org>
Run-TryBot: Martin Möhrmann <martin@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>

											
										
										
											2021-08-23 13:53:22 +02:00
+										{Name: "rdtscp", Feature: &X86.HasRDTSCP},
-												internal/cpu: detect sha-ni instruction support for AMD64

    addresses proposal #53084
    required by sha-256 change list developed for #50543

Change-Id: I5454d746fce069a7a4993d70dc5b0a5544f8eeaf
Reviewed-on: https://go-review.googlesource.com/c/go/+/408794
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@google.com>

											
										
										
											2022-05-25 20:03:48 -04:00
+										{Name: "sha", Feature: &X86.HasSHA},
-												internal/cpu: disallow disabling options that are required for microarch

e.g., if GOAMD64=v3, don't allow GODEBUG=cpu.XXX=off for XXX which
are required for v3.

Change-Id: Ib58a4c8b13c5464ba476448ba44bbb261218787c
Reviewed-on: https://go-review.googlesource.com/c/go/+/391694
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>

											
										
										
											2022-03-10 15:26:22 -08:00
+									}
 									level := getGOAMD64level()
 									if level < 2 {
 										// These options are required at level 2. At lower levels
 										// they can be turned off.
 										options = append(options,
 											option{Name: "popcnt", Feature: &X86.HasPOPCNT},
 											option{Name: "sse3", Feature: &X86.HasSSE3},
 											option{Name: "sse41", Feature: &X86.HasSSE41},
 											option{Name: "sse42", Feature: &X86.HasSSE42},
 											option{Name: "ssse3", Feature: &X86.HasSSSE3})
 									}
 									if level < 3 {
 										// These options are required at level 3. At lower levels
 										// they can be turned off.
 										options = append(options,
 											option{Name: "avx", Feature: &X86.HasAVX},
 											option{Name: "avx2", Feature: &X86.HasAVX2},
 											option{Name: "bmi1", Feature: &X86.HasBMI1},
 											option{Name: "bmi2", Feature: &X86.HasBMI2},
 											option{Name: "fma", Feature: &X86.HasFMA})
-												internal/cpu: add experiment to disable CPU features with GODEBUGCPU

Needs the go compiler to be build with GOEXPERIMENT=debugcpu to be active.

The GODEBUGCPU environment variable can be used to disable usage of
specific processor features in the Go standard library.
This is useful for testing and benchmarking different code paths that
are guarded by internal/cpu variable checks.

Use of processor features can not be enabled through GODEBUGCPU.

To disable usage of AVX and SSE41 cpu features on GOARCH amd64 use:
GODEBUGCPU=avx=0,sse41=0

The special "all" option can be used to disable all options:
GODEBUGCPU=all=0

Updates #12805
Updates #15403

Change-Id: I699c2e6f74d98472b6fb4b1e5ffbf29b15697aab
Reviewed-on: https://go-review.googlesource.com/91737
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-01-26 12:14:27 +01:00
+									}
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+									if level < 4 {
 										// These options are required at level 4. At lower levels
 										// they can be turned off.
 										options = append(options,
 											option{Name: "avx512f", Feature: &X86.HasAVX512F},
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+											option{Name: "avx512cd", Feature: &X86.HasAVX512CD},
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+											option{Name: "avx512bw", Feature: &X86.HasAVX512BW},
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+											option{Name: "avx512dq", Feature: &X86.HasAVX512DQ},
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+											option{Name: "avx512vl", Feature: &X86.HasAVX512VL},
 										)
 									}
-												internal/cpu: add experiment to disable CPU features with GODEBUGCPU

Needs the go compiler to be build with GOEXPERIMENT=debugcpu to be active.

The GODEBUGCPU environment variable can be used to disable usage of
specific processor features in the Go standard library.
This is useful for testing and benchmarking different code paths that
are guarded by internal/cpu variable checks.

Use of processor features can not be enabled through GODEBUGCPU.

To disable usage of AVX and SSE41 cpu features on GOARCH amd64 use:
GODEBUGCPU=avx=0,sse41=0

The special "all" option can be used to disable all options:
GODEBUGCPU=all=0

Updates #12805
Updates #15403

Change-Id: I699c2e6f74d98472b6fb4b1e5ffbf29b15697aab
Reviewed-on: https://go-review.googlesource.com/91737
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-01-26 12:14:27 +01:00
-												internal/cpu: fix style nit in variable name

Consistent with similar change of style in the crypto repository:
http://golang.org/cl/43511

Change-Id: Ib158c52a2649dcbbe9eb92f2bdb9d289e0dcc7bf
Reviewed-on: https://go-review.googlesource.com/53474
Reviewed-by: Han-Wen Nienhuys <hanwen@google.com>
Reviewed-by: Avelino <t@avelino.xxx>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2017-08-06 16:15:05 +02:00
+									maxID, _, _, _ := cpuid(0, 0)
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
-												internal/cpu: fix style nit in variable name

Consistent with similar change of style in the crypto repository:
http://golang.org/cl/43511

Change-Id: Ib158c52a2649dcbbe9eb92f2bdb9d289e0dcc7bf
Reviewed-on: https://go-review.googlesource.com/53474
Reviewed-by: Han-Wen Nienhuys <hanwen@google.com>
Reviewed-by: Avelino <t@avelino.xxx>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2017-08-06 16:15:05 +02:00
+									if maxID < 1 {
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+										return
 									}
-												testing: print cpu type as label for benchmarks

Supports 386 and amd64 architectures on all operating systems.

Example output:
$ go test -bench=.*
goos: darwin
goarch: amd64
pkg: strconv
cpu: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz
BenchmarkAtof64Decimal-4        	24431032	        46.8 ns/op
...

As the displayed CPU information is only used for information
purposes it is lazily initialized when needed using the new
internal/sysinfo package.

This allows internal/cpu to stay without dependencies and avoid
initialization costs when the CPU information is not needed as
the new code to query the CPU name in internal/cpu can be
dead code eliminated if not used.

Fixes #39214

Change-Id: I77ae5c5d2fed6b28fa78dd45075f9f0a6a7f1bfd
Reviewed-on: https://go-review.googlesource.com/c/go/+/263804
Trust: Martin Möhrmann <moehrmann@google.com>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2020-10-20 09:56:14 +02:00
+									maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
-												all: replace runtime SSE2 detection with GO386 setting

When GO386=sse2 we can assume sse2 to be present without
a runtime check. If GO386=softfloat is set we can avoid
the usage of SSE2 even if detected.

This might cause a memcpy, memclr and bytealg slowdown of Go
binaries compiled with softfloat on machines that support
SSE2. Such setups are rare and should use GO386=sse2 instead
if performance matters.

On targets that support SSE2 we avoid the runtime overhead of
dynamic cpu feature dispatch.

The removal of runtime sse2 checks also allows to simplify
internal/cpu further by removing handling of the required
feature option as a followup after this CL.

Change-Id: I90a853a8853a405cb665497c6d1a86556947ba17
Reviewed-on: https://go-review.googlesource.com/c/go/+/344350
Trust: Martin Möhrmann <martin@golang.org>
Run-TryBot: Martin Möhrmann <martin@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2021-08-23 11:34:51 +02:00
+									_, _, ecx1, _ := cpuid(1, 0)
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
 									X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
 									X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
 									X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
 									X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
 									X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
 									X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
 									X86.HasAES = isSet(ecx1, cpuid_AES)
-												internal/cpu: disable FMA when OSXSAVE is not enabled on x86

All instructions in the FMA extension on x86 are VEX prefixed.
VEX prefixed instructions generally require OSXSAVE to be enabled.

The execution of FMA instructions emitted by the Go compiler on amd64
will generate an invalid opcode exception if OSXSAVE is not enabled.

Fixes #41022

Change-Id: I49881630e7195c804110a2bd81b5bec8cac31ba8
Reviewed-on: https://go-review.googlesource.com/c/go/+/274479
Trust: Martin Möhrmann <moehrmann@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2020-12-03 16:41:57 +01:00
 									// OSXSAVE can be false when using older Operating Systems
 									// or when explicitly disabled on newer Operating Systems by
 									// e.g. setting the xsavedisable boot option on Windows 10.
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+									X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
-												internal/cpu: disable FMA when OSXSAVE is not enabled on x86

All instructions in the FMA extension on x86 are VEX prefixed.
VEX prefixed instructions generally require OSXSAVE to be enabled.

The execution of FMA instructions emitted by the Go compiler on amd64
will generate an invalid opcode exception if OSXSAVE is not enabled.

Fixes #41022

Change-Id: I49881630e7195c804110a2bd81b5bec8cac31ba8
Reviewed-on: https://go-review.googlesource.com/c/go/+/274479
Trust: Martin Möhrmann <moehrmann@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2020-12-03 16:41:57 +01:00
+									// The FMA instruction set extension only has VEX prefixed instructions.
 									// VEX prefixed instructions require OSXSAVE to be enabled.
 									// See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2
 									// Section 2.4 "AVX and SSE Instruction Exception Specification"
 									X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasOSXSAVE
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+									osSupportsAVX := false
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+									osSupportsAVX512 := false
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+									// For XGETBV, OSXSAVE bit is required and sufficient.
 									if X86.HasOSXSAVE {
 										eax, _ := xgetbv()
 										// Check if XMM and YMM registers have OS support.
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+										osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
 										// AVX512 detection does not work on Darwin,
 										// see https://github.com/golang/go/issues/49233
 										//
 										// Check if opmask, ZMMhi256 and Hi16_ZMM have OS support.
 										osSupportsAVX512 = osSupportsAVX && isSet(eax, 1<<5) && isSet(eax, 1<<6) && isSet(eax, 1<<7)
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+									}
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+									X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
-												internal/cpu: fix style nit in variable name

Consistent with similar change of style in the crypto repository:
http://golang.org/cl/43511

Change-Id: Ib158c52a2649dcbbe9eb92f2bdb9d289e0dcc7bf
Reviewed-on: https://go-review.googlesource.com/53474
Reviewed-by: Han-Wen Nienhuys <hanwen@google.com>
Reviewed-by: Avelino <t@avelino.xxx>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2017-08-06 16:15:05 +02:00
+									if maxID < 7 {
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+										return
 									}
-												[dev.simd] internal/cpu: add GFNI feature check

This CL amends HasAVX512 flag with GFNI check.

This is needed because our SIMD API supports Galois Field operations.

Change-Id: I3e957b7b2215d2b7b6b8a7a0ca3e2e60d453b2e5
Reviewed-on: https://go-review.googlesource.com/c/go/+/685295
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-07-01 18:00:33 +00:00
+									_, ebx7, ecx7, edx7 := cpuid(7, 0)
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+									X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
 									X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
 									X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
 									X86.HasERMS = isSet(ebx7, cpuid_ERMS)
 									X86.HasADX = isSet(ebx7, cpuid_ADX)
-												internal/cpu: detect sha-ni instruction support for AMD64

    addresses proposal #53084
    required by sha-256 change list developed for #50543

Change-Id: I5454d746fce069a7a4993d70dc5b0a5544f8eeaf
Reviewed-on: https://go-review.googlesource.com/c/go/+/408794
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Keith Randall <khr@google.com>

											
										
										
											2022-05-25 20:03:48 -04:00
+									X86.HasSHA = isSet(ebx7, cpuid_SHA)
-												runtime: use RDTSCP for instruction stream serialized read of TSC

To measure all instructions having been completed before reading
the time stamp counter with RDTSC an instruction sequence that
has instruction stream serializing properties which guarantee
waiting until all previous instructions have been executed is
needed. This does not necessary mean to wait for all stores to
be globally visible.

This CL aims to remove vendor specific logic for determining the
instruction sequence with CPU feature flag checks that are
CPU vendor independent.

For intel LFENCE has the wanted properties at least
since it was introduced together with SSE2 support.

On AMD instruction stream serializing LFENCE is supported by setting
an MSR C001_1029[1]=1 on AMD family 10h/12h/14h/15h/16h/17h processors.
AMD family 0Fh/11h processors support LFENCE as serializing always.
AMD plans support for this MSR and access to this bit for all future processors.
Source: https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf

Reading the MSR to determine LFENCE properties is not always possible
or reliable (hypervisors). The Linux kernel is relying on serializing
LFENCE on AMD CPUs since a commit in July 2019: https://lkml.org/lkml/2019/7/22/295
and the MSR C001_1029 to enable serialization has been set by default
with the Spectre v1 mitigations.

Using an MFENCE on AMD is waiting on previous instructions having been executed
but in addition also flushes store buffers.

To align the serialization properties without runtime detection
of CPU manufacturers we can use the newer RDTSCP instruction which
waits until all previous instructions have been executed.

RDTSCP is available on Intel since around 2008 and on AMD CPUs since
around 2006. Support for RDTSCP can be checked independently
of manufacturer by checking CPUID bits.

Using RDTSCP is the default in Linux to read TSC in program order
when the instruction is available.
https://github.com/torvalds/linux/blob/e22ce8eb631bdc47a4a4ea7ecf4e4ba499db4f93/arch/x86/include/asm/msr.h#L231

Change-Id: Ifa841843b9abb2816f8f0754a163ebf01385306d
Reviewed-on: https://go-review.googlesource.com/c/go/+/344429
Reviewed-by: Keith Randall <khr@golang.org>
Trust: Martin Möhrmann <martin@golang.org>
Run-TryBot: Martin Möhrmann <martin@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>

											
										
										
											2021-08-23 13:53:22 +02:00
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+									X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512
 									if X86.HasAVX512F {
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+										X86.HasAVX512CD = isSet(ebx7, cpuid_AVX512CD)
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+										X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW)
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+										X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ)
-												internal/cpu: detect support of AVX512

Extracts changes from that were submitted in other CLs to enable AVX512
detection, notably:
- https://go-review.googlesource.com/c/go/+/271521
- https://go-review.googlesource.com/c/go/+/379394
- https://go-review.googlesource.com/c/go/+/502476

This change adds properties to the cpu.X86 fields to enable runtime
detection of AVX512, and the hasAVX512F, hasAVX512BW, and hasAVX512VL
macros to support bypassing runtime checks in assembly code when
GOAMD64=v4 is set.

Change-Id: Ia7c3f22f1e66bf1de575aba522cb0d0a55ce791f
Reviewed-on: https://go-review.googlesource.com/c/go/+/536257
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <martin@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
Commit-Queue: Martin Möhrmann <martin@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>

											
										
										
											2023-10-18 19:21:55 +00:00
+										X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
 									}
-												runtime: add ERMS-based memmove support for modern CPU platforms

The current memmove implementation uses REP MOVSB to copy data larger than
2KB when the useAVXmemmove global variable is false and the CPU supports
the ERMS feature.

This feature is currently only enabled on CPUs in the Sandy Bridge (Client)
, Sandy Bridge (Server), Ivy Bridge (Client), and Ivy Bridge (Server)
microarchitectures.

For modern Intel CPU microarchitectures that support the ERMS feature, such
as Ice Lake (Server), Sapphire Rapids , REP MOVSB achieves better
performance than the AVX-based copy currently implemented in memmove.

Benchstat result:

goos: linux
goarch: amd64
pkg: runtime
cpu: Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz
               │  ./old.txt  │              ./new.txt              │
               │   sec/op    │   sec/op     vs base                │
Memmove/2048-2   25.24n ± 0%   24.27n ± 0%   -3.84% (p=0.000 n=10)
Memmove/4096-2   44.87n ± 0%   33.16n ± 1%  -26.11% (p=0.000 n=10)
geomean          33.65n        28.37n       -15.71%

               │  ./old.txt   │               ./new.txt               │
               │     B/s      │      B/s       vs base                │
Memmove/2048-2   75.56Gi ± 0%    78.59Gi ± 0%   +4.02% (p=0.000 n=10)
Memmove/4096-2   85.01Gi ± 0%   115.05Gi ± 1%  +35.34% (p=0.000 n=10)
geomean          80.14Gi         95.09Gi       +18.65%

Fixes #66958

Change-Id: I1fafd1b51a16752f83ac15047cf3b29422a79d5d
GitHub-Last-Rev: 89cf5af32b1b41e1499282058656a8a5c7aed359
GitHub-Pull-Request: golang/go#66959
Reviewed-on: https://go-review.googlesource.com/c/go/+/580735
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>

											
										
										
											2024-07-02 04:02:12 +00:00
+									X86.HasFSRM = isSet(edx7, cpuid_FSRM)
-												[dev.simd] internal/cpu: add GFNI feature check

This CL amends HasAVX512 flag with GFNI check.

This is needed because our SIMD API supports Galois Field operations.

Change-Id: I3e957b7b2215d2b7b6b8a7a0ca3e2e60d453b2e5
Reviewed-on: https://go-review.googlesource.com/c/go/+/685295
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-07-01 18:00:33 +00:00
+									X86.HasGFNI = isSet(ecx7, cpuid_GFNI)
-												runtime: add ERMS-based memmove support for modern CPU platforms

The current memmove implementation uses REP MOVSB to copy data larger than
2KB when the useAVXmemmove global variable is false and the CPU supports
the ERMS feature.

This feature is currently only enabled on CPUs in the Sandy Bridge (Client)
, Sandy Bridge (Server), Ivy Bridge (Client), and Ivy Bridge (Server)
microarchitectures.

For modern Intel CPU microarchitectures that support the ERMS feature, such
as Ice Lake (Server), Sapphire Rapids , REP MOVSB achieves better
performance than the AVX-based copy currently implemented in memmove.

Benchstat result:

goos: linux
goarch: amd64
pkg: runtime
cpu: Intel(R) Xeon(R) Gold 6348 CPU @ 2.60GHz
               │  ./old.txt  │              ./new.txt              │
               │   sec/op    │   sec/op     vs base                │
Memmove/2048-2   25.24n ± 0%   24.27n ± 0%   -3.84% (p=0.000 n=10)
Memmove/4096-2   44.87n ± 0%   33.16n ± 1%  -26.11% (p=0.000 n=10)
geomean          33.65n        28.37n       -15.71%

               │  ./old.txt   │               ./new.txt               │
               │     B/s      │      B/s       vs base                │
Memmove/2048-2   75.56Gi ± 0%    78.59Gi ± 0%   +4.02% (p=0.000 n=10)
Memmove/4096-2   85.01Gi ± 0%   115.05Gi ± 1%  +35.34% (p=0.000 n=10)
geomean          80.14Gi         95.09Gi       +18.65%

Fixes #66958

Change-Id: I1fafd1b51a16752f83ac15047cf3b29422a79d5d
GitHub-Last-Rev: 89cf5af32b1b41e1499282058656a8a5c7aed359
GitHub-Pull-Request: golang/go#66959
Reviewed-on: https://go-review.googlesource.com/c/go/+/580735
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>

											
										
										
											2024-07-02 04:02:12 +00:00
-												runtime: use RDTSCP for instruction stream serialized read of TSC

To measure all instructions having been completed before reading
the time stamp counter with RDTSC an instruction sequence that
has instruction stream serializing properties which guarantee
waiting until all previous instructions have been executed is
needed. This does not necessary mean to wait for all stores to
be globally visible.

This CL aims to remove vendor specific logic for determining the
instruction sequence with CPU feature flag checks that are
CPU vendor independent.

For intel LFENCE has the wanted properties at least
since it was introduced together with SSE2 support.

On AMD instruction stream serializing LFENCE is supported by setting
an MSR C001_1029[1]=1 on AMD family 10h/12h/14h/15h/16h/17h processors.
AMD family 0Fh/11h processors support LFENCE as serializing always.
AMD plans support for this MSR and access to this bit for all future processors.
Source: https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf

Reading the MSR to determine LFENCE properties is not always possible
or reliable (hypervisors). The Linux kernel is relying on serializing
LFENCE on AMD CPUs since a commit in July 2019: https://lkml.org/lkml/2019/7/22/295
and the MSR C001_1029 to enable serialization has been set by default
with the Spectre v1 mitigations.

Using an MFENCE on AMD is waiting on previous instructions having been executed
but in addition also flushes store buffers.

To align the serialization properties without runtime detection
of CPU manufacturers we can use the newer RDTSCP instruction which
waits until all previous instructions have been executed.

RDTSCP is available on Intel since around 2008 and on AMD CPUs since
around 2006. Support for RDTSCP can be checked independently
of manufacturer by checking CPUID bits.

Using RDTSCP is the default in Linux to read TSC in program order
when the instruction is available.
https://github.com/torvalds/linux/blob/e22ce8eb631bdc47a4a4ea7ecf4e4ba499db4f93/arch/x86/include/asm/msr.h#L231

Change-Id: Ifa841843b9abb2816f8f0754a163ebf01385306d
Reviewed-on: https://go-review.googlesource.com/c/go/+/344429
Reviewed-by: Keith Randall <khr@golang.org>
Trust: Martin Möhrmann <martin@golang.org>
Run-TryBot: Martin Möhrmann <martin@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>

											
										
										
											2021-08-23 13:53:22 +02:00
+									var maxExtendedInformation uint32
 									maxExtendedInformation, _, _, _ = cpuid(0x80000000, 0)
 									if maxExtendedInformation < 0x80000001 {
 										return
 									}
 									_, _, _, edxExt1 := cpuid(0x80000001, 0)
 									X86.HasRDTSCP = isSet(edxExt1, cpuid_RDTSCP)
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
 									doDerived = func() {
 										// Rather than carefully gating on fundamental AVX-512 features, we have
 										// a virtual "AVX512" feature that captures F+CD+BW+DQ+VL. BW, DQ, and
 										// VL have a huge effect on which AVX-512 instructions are available,
 										// and these have all been supported on everything except the earliest
 										// Phi chips with AVX-512. No CPU has had CD without F, so we include
 										// it. GOAMD64=v4 also implies exactly this set, and these are all
 										// included in AVX10.1.
 										X86.HasAVX512 = X86.HasAVX512F && X86.HasAVX512CD && X86.HasAVX512BW && X86.HasAVX512DQ && X86.HasAVX512VL
-												[dev.simd] internal/cpu: add GFNI feature check

This CL amends HasAVX512 flag with GFNI check.

This is needed because our SIMD API supports Galois Field operations.

Change-Id: I3e957b7b2215d2b7b6b8a7a0ca3e2e60d453b2e5
Reviewed-on: https://go-review.googlesource.com/c/go/+/685295
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-07-01 18:00:33 +00:00
+										X86.HasAVX512GFNI = X86.HasAVX512 && X86.HasGFNI
-												[dev.simd] internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512"

This adds detection for the CD and DQ sub-features of x86 AVX-512.

Building on these, we also add a "derived" AVX-512 feature that
bundles together the basic usable subset of subfeatures. Despite the F
in AVX-512-F standing for "foundation", AVX-512-F+BW+DQ+VL together
really form the basic usable subset of AVX-512 functionality. These
have also all been supported together by almost every CPU, and are
guaranteed by GOAMD64=v4, so there's little point in separating them
out.

Change-Id: I34356502bd1853ba2372e48db0b10d55cffe07a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/680899
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

											
										
										
											2025-06-12 15:24:22 -04:00
+									}
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+								}
-												internal/cpu: align capability definitions for x86 with other architectures

Use constant masks and align the definition of isSet with
arm64 and ppc64x.

Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9
Reviewed-on: https://go-review.googlesource.com/94759
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

											
										
										
											2018-02-04 20:39:39 +01:00
+								func isSet(hwc uint32, value uint32) bool {
 									return hwc&value != 0
-												internal/cpu: new package to detect cpu features

Implements detection of x86 cpu features that
are used in the go standard library.

Changes all standard library packages to use the new cpu package
instead of using runtime internal variables to check x86 cpu features.

Updates: #15403

Change-Id: I2999a10cb4d9ec4863ffbed72f4e021a1dbc4bb9
Reviewed-on: https://go-review.googlesource.com/41476
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

											
										
										
											2017-04-03 22:38:09 +02:00
+								}
-												testing: print cpu type as label for benchmarks

Supports 386 and amd64 architectures on all operating systems.

Example output:
$ go test -bench=.*
goos: darwin
goarch: amd64
pkg: strconv
cpu: Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz
BenchmarkAtof64Decimal-4        	24431032	        46.8 ns/op
...

As the displayed CPU information is only used for information
purposes it is lazily initialized when needed using the new
internal/sysinfo package.

This allows internal/cpu to stay without dependencies and avoid
initialization costs when the CPU information is not needed as
the new code to query the CPU name in internal/cpu can be
dead code eliminated if not used.

Fixes #39214

Change-Id: I77ae5c5d2fed6b28fa78dd45075f9f0a6a7f1bfd
Reviewed-on: https://go-review.googlesource.com/c/go/+/263804
Trust: Martin Möhrmann <moehrmann@google.com>
Reviewed-by: Keith Randall <khr@golang.org>

											
										
										
											2020-10-20 09:56:14 +02:00
 								// Name returns the CPU name given by the vendor.
 								// If the CPU name can not be determined an
 								// empty string is returned.
 								func Name() string {
 									if maxExtendedFunctionInformation < 0x80000004 {
 										return ""
 									}
 									data := make([]byte, 0, 3*4*4)
 									var eax, ebx, ecx, edx uint32
 									eax, ebx, ecx, edx = cpuid(0x80000002, 0)
 									data = appendBytes(data, eax, ebx, ecx, edx)
 									eax, ebx, ecx, edx = cpuid(0x80000003, 0)
 									data = appendBytes(data, eax, ebx, ecx, edx)
 									eax, ebx, ecx, edx = cpuid(0x80000004, 0)
 									data = appendBytes(data, eax, ebx, ecx, edx)
 									// Trim leading spaces.
 									for len(data) > 0 && data[0] == ' ' {
 										data = data[1:]
 									}
 									// Trim tail after and including the first null byte.
 									for i, c := range data {
 										if c == '\x00' {
 											data = data[:i]
 											break
 										}
 									}
 									return string(data)
 								}
 								func appendBytes(b []byte, args ...uint32) []byte {
 									for _, arg := range args {
 										b = append(b,
 											byte((arg >> 0)),
 											byte((arg >> 8)),
 											byte((arg >> 16)),
 											byte((arg >> 24)))
 									}
 									return b
 								}