From 356c48d8e95dae2b9baa72d715c973a65938a35d Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Thu, 4 Sep 2025 17:15:14 -0400 Subject: [PATCH] [dev.simd] cmd/compile, simd: add ClearAVXUpperBits Intended for transitioning from AVX to SSE, this helps early adopters benchmarking. The compiler should take care of that, one day. Change-Id: I9d7413f22f30f8dc0c632e8e806386d9ca8e8308 Reviewed-on: https://go-review.googlesource.com/c/go/+/701199 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/_gen/AMD64Ops.go | 4 ++-- src/cmd/compile/internal/ssa/opGen.go | 12 ++++++---- src/cmd/compile/internal/ssagen/intrinsics.go | 7 ++++++ src/simd/extra_amd64.go | 17 ++++++++++++++ src/simd/internal/simd_test/simd_test.go | 22 +++++++++++++++++++ 5 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 src/simd/extra_amd64.go diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index 96001e203f1..ff6235839be 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -1397,8 +1397,8 @@ func init() { {name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true}, {name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true}, - {name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"}, - {name: "VZEROALL", argLength: 0, asm: "VZEROALL"}, + {name: "VZEROUPPER", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROUPPER"}, // arg=mem, returns mem + {name: "VZEROALL", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROALL"}, // arg=mem, returns mem {name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, {name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index e7f06fccf7e..9fc60598656 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -19070,15 +19070,19 @@ var opcodeTable = [...]opInfo{ }, { name: "VZEROUPPER", - argLen: 0, + argLen: 1, asm: x86.AVZEROUPPER, - reg: regInfo{}, + reg: regInfo{ + clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, }, { name: "VZEROALL", - argLen: 0, + argLen: 1, asm: x86.AVZEROALL, - reg: regInfo{}, + reg: regInfo{ + clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, }, { name: "KMOVQload", diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index f5b5b9bb7cd..4d1b762f7d4 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1607,6 +1607,13 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { if buildcfg.Experiment.SIMD { // Only enable intrinsics, if SIMD experiment. simdIntrinsics(addF) + + addF("simd", "ClearAVXUpperBits", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem()) + return nil + }, + sys.AMD64) } } diff --git a/src/simd/extra_amd64.go b/src/simd/extra_amd64.go new file mode 100644 index 00000000000..6d09f04bbbf --- /dev/null +++ b/src/simd/extra_amd64.go @@ -0,0 +1,17 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package simd + +// ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers. +// It is intended for transitioning from AVX to SSE, eliminating the +// performance penalties caused by false dependencies. +// +// Note: in the future the compiler may automatically generate the +// instruction, making this function unnecessary. +// +// Asm: VZEROUPPER, CPU Feature: AVX +func ClearAVXUpperBits() diff --git a/src/simd/internal/simd_test/simd_test.go b/src/simd/internal/simd_test/simd_test.go index 98cfd55ac5c..1d4311d75c5 100644 --- a/src/simd/internal/simd_test/simd_test.go +++ b/src/simd/internal/simd_test/simd_test.go @@ -518,3 +518,25 @@ func TestFlattenedTranspose(t *testing.T) { checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4}) } + +func TestClearAVXUpperBits(t *testing.T) { + // Test that ClearAVXUpperBits is safe even if there are SIMD values + // alive (although usually one should not do this). + if !simd.HasAVX2() { + t.Skip("Test requires HasAVX2, not available on this hardware") + return + } + + r := make([]int64, 4) + s := make([]int64, 4) + + x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40}) + y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) + + x.Add(y).StoreSlice(r) + simd.ClearAVXUpperBits() + x.Sub(y).StoreSlice(s) + + checkSlices[int64](t, r, []int64{11, 22, 33, 44}) + checkSlices[int64](t, s, []int64{9, 18, 27, 36}) +}