go/test/codegen/simd_arm64.go
Cherry Mui a2b3c73f75 simd/archsimd: correct ARM64 IfElse semantics
ARM64's IfElse behavior is reversed from other platforms. Reverse
it. Internally, its bitSelect is also the reverse of Wasm's
BitSelect. Reverse the ARM64 one to match.

Make Masked and IfElse tests portable.

Change-Id: Icd2dbcb3383b2be642fd6fc7115ef1cbef0f9b78
Reviewed-on: https://go-review.googlesource.com/c/go/+/793361
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2026-06-23 20:49:14 -07:00

138 lines
4.1 KiB
Go

// asmcheck
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// These tests check ARM64 SIMD code generation and peephole optimizations.
//go:build goexperiment.simd && arm64
package codegen
import (
"simd/archsimd"
)
//go:noinline
func forceSpill() {}
func spillAroundCall(a archsimd.Int8x16) archsimd.Int8x16 {
forceSpill()
// arm64:`FMOVQ` `FMOVQ`
return a
}
var (
sinkU8 archsimd.Uint8x16
sinkI8 archsimd.Int8x16
sinkU16 archsimd.Uint16x8
sinkU32 archsimd.Uint32x4
sinkU64 archsimd.Uint64x2
sinkF32 archsimd.Float32x4
sinkF64 archsimd.Float64x2
)
func broadcastConstImmFold(k int) {
switch k {
case 0:
// arm64:`VMOVI [$]0,` -`VDUP`
sinkU8 = archsimd.BroadcastUint8x16(0)
case 1:
// arm64:`VMOVI [$]1,` -`VDUP`
sinkU8 = archsimd.BroadcastUint8x16(1)
case 127:
// arm64:`VMOVI [$]127,` -`VDUP`
sinkI8 = archsimd.BroadcastInt8x16(127)
case 128:
// arm64:`VMOVI [$]128,` -`VDUP`
sinkU8 = archsimd.BroadcastUint8x16(128)
case -128:
// arm64:`VMOVI [$]128,` -`VDUP`
sinkI8 = archsimd.BroadcastInt8x16(-128)
case 255:
// arm64:`VMOVI [$]255,` -`VDUP`
sinkU8 = archsimd.BroadcastUint8x16(255)
case -1:
// arm64:`VMOVI [$]255,` -`VDUP`
sinkI8 = archsimd.BroadcastInt8x16(-1)
case -2:
// arm64:`VMOVI [$]254,` -`VDUP`
sinkI8 = archsimd.BroadcastInt8x16(-2)
default:
// arm64:`VMOV R0, V\d+.B\[0\]` `VDUP`
sinkI8 = archsimd.BroadcastInt8x16(int8(k))
}
}
func shiftAllImmFold(k int) {
switch k {
case 100:
// arm64:`VMOVI [$]100,` `VSSHL` -`VDUP`
sinkI8 = sinkI8.ShiftAllLeft(100)
// arm64:`VMOVI [$]156,` `VUSHL` -`VDUP`
sinkU8 = sinkU8.ShiftAllRight(100)
}
}
func setHiUint32(x, lo archsimd.Uint32x4) {
// arm64:`VMOV V1.D\[0\], V0.D\[1\]`
sinkU32 = loToHiUint32Vec(x, lo)
}
func setHiFloat64(x, lo archsimd.Float64x2) {
// arm64:`VMOV V1.D\[0\], V0.D\[1\]`
sinkF64 = x.SetElem(1, lo.GetElem(0))
}
func getHiFloat32(x archsimd.Float32x4) {
// arm64:`VDUP V0.D\[1\],`
sinkF32 = x.HiToLo()
}
func getHiFloat64(x archsimd.Float64x2) {
// arm64:`VDUP V0.D\[1\],`
sinkF64 = x.HiToLo()
}
func foldGetHiSetHiMuls(a, b archsimd.Uint16x8) archsimd.Uint16x8 {
wLo := a.MulWidenLo(b) // arm64: `VUMULL V0.H4, V1.H4, V[0-9].S4`
wHi := a.HiToLo().MulWidenLo(b.HiToLo()) // arm64: `VUMULL2 V1.H8, V0.H8, V[0-9].S4` -`VDUP`
narrowLo := wLo.TruncToUint16() // arm64: `VXTN V[0-9]+.S4, V0.H4`
narrowHi := wHi.TruncToUint16() // folded into next line
return loToHiUint16Vec(narrowLo, narrowHi) // arm64: `VXTN2 V[0-9]+.S4, V0.H8`
}
func carrylessMultiplies(x, y archsimd.Uint64x2) archsimd.Uint64x2 {
lo := x.CarrylessMultiplyEven(y) // arm64:`VPMULL V` -`VPMULL2`
hi := x.HiToLo().CarrylessMultiplyEven(y.HiToLo()) // arm64:`VPMULL2 V` -`VPMULL `
return lo.Xor(hi)
}
func mergeWithNotMask(x, y archsimd.Int8x16, mask archsimd.Mask8x16, f1, f2 archsimd.Float32x4) {
// arm64:`VBIT` -`VBIF` -`VNOT`
sinkI8 = x.IfElse(mask.Not(), y)
// arm64: `VFCMEQ`
eq := f1.Equal(f2)
// The next line `ne` should be CSEd with `eq` above
ne := f1.NotEqual(f2) // arm64: -`.*`
feq := f1.IfElse(eq, f2) // arm64:`VBIF`
fne := f1.IfElse(ne, f2) // arm64:`VBIT`
sinkF32 = fne.Add(feq)
}
// loToHiUint32Vec returns a vector with the lower 64 bits of x preserved and
// the upper 64 bits replaced with the lower 64 bits of lo.
// It routes through Float64x2 to stay in the FP/SIMD register file,
// avoiding a round-trip through a GP register.
func loToHiUint32Vec(x, lo archsimd.Uint32x4) archsimd.Uint32x4 {
return x.ReshapeToUint64s().BitsToFloat64().SetElem(1, lo.ReshapeToUint64s().BitsToFloat64().GetElem(0)).ToBits().ReshapeToUint32s()
}
// loToHiUint16Vec returns a vector with the lower 64 bits of x preserved and
// the upper 64 bits replaced with the lower 64 bits of lo.
// It routes through Float64x2 to stay in the FP/SIMD register file,
// avoiding a round-trip through a GP register.
func loToHiUint16Vec(x, lo archsimd.Uint16x8) archsimd.Uint16x8 {
return x.ReshapeToUint64s().BitsToFloat64().SetElem(1, lo.ReshapeToUint64s().BitsToFloat64().GetElem(0)).ToBits().ReshapeToUint16s()
}