mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add ClearAVXUpperBits
Intended for transitioning from AVX to SSE, this helps early adopters benchmarking. The compiler should take care of that, one day. Change-Id: I9d7413f22f30f8dc0c632e8e806386d9ca8e8308 Reviewed-on: https://go-review.googlesource.com/c/go/+/701199 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
7c8b9115bc
commit
356c48d8e9
5 changed files with 56 additions and 6 deletions
|
|
@ -1397,8 +1397,8 @@ func init() {
|
|||
{name: "VMOVSSconst", reg: fp01, asm: "VMOVSS", aux: "Float32", rematerializeable: true},
|
||||
{name: "VMOVSDconst", reg: fp01, asm: "VMOVSD", aux: "Float64", rematerializeable: true},
|
||||
|
||||
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
|
||||
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
||||
{name: "VZEROUPPER", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROUPPER"}, // arg=mem, returns mem
|
||||
{name: "VZEROALL", argLength: 1, reg: regInfo{clobbers: v}, asm: "VZEROALL"}, // arg=mem, returns mem
|
||||
|
||||
{name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
|
||||
{name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"},
|
||||
|
|
|
|||
|
|
@ -19070,15 +19070,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "VZEROUPPER",
|
||||
argLen: 0,
|
||||
argLen: 1,
|
||||
asm: x86.AVZEROUPPER,
|
||||
reg: regInfo{},
|
||||
reg: regInfo{
|
||||
clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VZEROALL",
|
||||
argLen: 0,
|
||||
argLen: 1,
|
||||
asm: x86.AVZEROALL,
|
||||
reg: regInfo{},
|
||||
reg: regInfo{
|
||||
clobbers: 2147418112, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "KMOVQload",
|
||||
|
|
|
|||
|
|
@ -1607,6 +1607,13 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
|||
if buildcfg.Experiment.SIMD {
|
||||
// Only enable intrinsics, if SIMD experiment.
|
||||
simdIntrinsics(addF)
|
||||
|
||||
addF("simd", "ClearAVXUpperBits",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
s.vars[memVar] = s.newValue1(ssa.OpAMD64VZEROUPPER, types.TypeMem, s.mem())
|
||||
return nil
|
||||
},
|
||||
sys.AMD64)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
17
src/simd/extra_amd64.go
Normal file
17
src/simd/extra_amd64.go
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build goexperiment.simd && amd64
|
||||
|
||||
package simd
|
||||
|
||||
// ClearAVXUpperBits clears the high bits of Y0-Y15 and Z0-Z15 registers.
|
||||
// It is intended for transitioning from AVX to SSE, eliminating the
|
||||
// performance penalties caused by false dependencies.
|
||||
//
|
||||
// Note: in the future the compiler may automatically generate the
|
||||
// instruction, making this function unnecessary.
|
||||
//
|
||||
// Asm: VZEROUPPER, CPU Feature: AVX
|
||||
func ClearAVXUpperBits()
|
||||
|
|
@ -518,3 +518,25 @@ func TestFlattenedTranspose(t *testing.T) {
|
|||
checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
|
||||
|
||||
}
|
||||
|
||||
func TestClearAVXUpperBits(t *testing.T) {
|
||||
// Test that ClearAVXUpperBits is safe even if there are SIMD values
|
||||
// alive (although usually one should not do this).
|
||||
if !simd.HasAVX2() {
|
||||
t.Skip("Test requires HasAVX2, not available on this hardware")
|
||||
return
|
||||
}
|
||||
|
||||
r := make([]int64, 4)
|
||||
s := make([]int64, 4)
|
||||
|
||||
x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
|
||||
y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
|
||||
|
||||
x.Add(y).StoreSlice(r)
|
||||
simd.ClearAVXUpperBits()
|
||||
x.Sub(y).StoreSlice(s)
|
||||
|
||||
checkSlices[int64](t, r, []int64{11, 22, 33, 44})
|
||||
checkSlices[int64](t, s, []int64{9, 18, 27, 36})
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue