[dev.simd] cmd/compile, simd: add ClearAVXUpperBits

Intended for transitioning from AVX to SSE, this helps early
adopters benchmarking. The compiler should take care of that,
one day.

Change-Id: I9d7413f22f30f8dc0c632e8e806386d9ca8e8308
Reviewed-on: https://go-review.googlesource.com/c/go/+/701199
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Cherry Mui 2025-09-04 17:15:14 -04:00
parent 7c8b9115bc
commit 356c48d8e9
5 changed files with 56 additions and 6 deletions

View file

@ -1397,8 +1397,8 @@ func init() {
{name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true}, {name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true},
{name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true}, {name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true},
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"}, {name: "VZEROUPPER", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROUPPER"}, // arg=mem, returns mem
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"}, {name: "VZEROALL", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROALL"}, // arg=mem, returns mem
{name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
{name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, {name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"},

View file

@ -19070,15 +19070,19 @@ var opcodeTable = [...]opInfo{
}, },
{ {
name: "VZEROUPPER", name: "VZEROUPPER",
argLen: 0, argLen: 1,
asm: x86.AVZEROUPPER, asm: x86.AVZEROUPPER,
reg: regInfo{}, reg: regInfo{
clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
}, },
{ {
name: "VZEROALL", name: "VZEROALL",
argLen: 0, argLen: 1,
asm: x86.AVZEROALL, asm: x86.AVZEROALL,
reg: regInfo{}, reg: regInfo{
clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
}, },
{ {
name: "KMOVQload", name: "KMOVQload",

View file

@ -1607,6 +1607,13 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
if buildcfg.Experiment.SIMD { if buildcfg.Experiment.SIMD {
// Only enable intrinsics, if SIMD experiment. // Only enable intrinsics, if SIMD experiment.
simdIntrinsics(addF) simdIntrinsics(addF)
addF("simd", "ClearAVXUpperBits",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
return nil
},
sys.AMD64)
} }
} }

17
src/simd/extra_amd64.go Normal file
View file

@ -0,0 +1,17 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build goexperiment.simd && amd64
package simd
// ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers.
// It is intended for transitioning from AVX to SSE, eliminating the
// performance penalties caused by false dependencies.
//
// Note: in the future the compiler may automatically generate the
// instruction, making this function unnecessary.
//
// Asm: VZEROUPPER, CPU Feature: AVX
func ClearAVXUpperBits()

View file

@ -518,3 +518,25 @@ func TestFlattenedTranspose(t *testing.T) {
checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4}) checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
} }
func TestClearAVXUpperBits(t *testing.T) {
// Test that ClearAVXUpperBits is safe even if there are SIMD values
// alive (although usually one should not do this).
if !simd.HasAVX2() {
t.Skip("Test requires HasAVX2, not available on this hardware")
return
}
r := make([]int64, 4)
s := make([]int64, 4)
x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
x.Add(y).StoreSlice(r)
simd.ClearAVXUpperBits()
x.Sub(y).StoreSlice(s)
checkSlices[int64](t, r, []int64{11, 22, 33, 44})
checkSlices[int64](t, s, []int64{9, 18, 27, 36})
}