mirror of
https://github.com/golang/go.git
synced 2026-06-26 10:50:23 +00:00
ARM64's IfElse behavior is reversed from other platforms. Reverse it. Internally, its bitSelect is also the reverse of Wasm's BitSelect. Reverse the ARM64 one to match. Make Masked and IfElse tests portable. Change-Id: Icd2dbcb3383b2be642fd6fc7115ef1cbef0f9b78 Reviewed-on: https://go-review.googlesource.com/c/go/+/793361 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
138 lines
4.1 KiB
Go
138 lines
4.1 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2026 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// These tests check ARM64 SIMD code generation and peephole optimizations.
|
|
|
|
//go:build goexperiment.simd && arm64
|
|
|
|
package codegen
|
|
|
|
import (
|
|
"simd/archsimd"
|
|
)
|
|
|
|
//go:noinline
|
|
func forceSpill() {}
|
|
|
|
func spillAroundCall(a archsimd.Int8x16) archsimd.Int8x16 {
|
|
forceSpill()
|
|
// arm64:`FMOVQ` `FMOVQ`
|
|
return a
|
|
}
|
|
|
|
var (
|
|
sinkU8 archsimd.Uint8x16
|
|
sinkI8 archsimd.Int8x16
|
|
sinkU16 archsimd.Uint16x8
|
|
sinkU32 archsimd.Uint32x4
|
|
sinkU64 archsimd.Uint64x2
|
|
sinkF32 archsimd.Float32x4
|
|
sinkF64 archsimd.Float64x2
|
|
)
|
|
|
|
func broadcastConstImmFold(k int) {
|
|
switch k {
|
|
case 0:
|
|
// arm64:`VMOVI [$]0,` -`VDUP`
|
|
sinkU8 = archsimd.BroadcastUint8x16(0)
|
|
case 1:
|
|
// arm64:`VMOVI [$]1,` -`VDUP`
|
|
sinkU8 = archsimd.BroadcastUint8x16(1)
|
|
case 127:
|
|
// arm64:`VMOVI [$]127,` -`VDUP`
|
|
sinkI8 = archsimd.BroadcastInt8x16(127)
|
|
case 128:
|
|
// arm64:`VMOVI [$]128,` -`VDUP`
|
|
sinkU8 = archsimd.BroadcastUint8x16(128)
|
|
case -128:
|
|
// arm64:`VMOVI [$]128,` -`VDUP`
|
|
sinkI8 = archsimd.BroadcastInt8x16(-128)
|
|
case 255:
|
|
// arm64:`VMOVI [$]255,` -`VDUP`
|
|
sinkU8 = archsimd.BroadcastUint8x16(255)
|
|
case -1:
|
|
// arm64:`VMOVI [$]255,` -`VDUP`
|
|
sinkI8 = archsimd.BroadcastInt8x16(-1)
|
|
case -2:
|
|
// arm64:`VMOVI [$]254,` -`VDUP`
|
|
sinkI8 = archsimd.BroadcastInt8x16(-2)
|
|
default:
|
|
// arm64:`VMOV R0, V\d+.B\[0\]` `VDUP`
|
|
sinkI8 = archsimd.BroadcastInt8x16(int8(k))
|
|
}
|
|
}
|
|
|
|
func shiftAllImmFold(k int) {
|
|
switch k {
|
|
case 100:
|
|
// arm64:`VMOVI [$]100,` `VSSHL` -`VDUP`
|
|
sinkI8 = sinkI8.ShiftAllLeft(100)
|
|
// arm64:`VMOVI [$]156,` `VUSHL` -`VDUP`
|
|
sinkU8 = sinkU8.ShiftAllRight(100)
|
|
}
|
|
}
|
|
|
|
func setHiUint32(x, lo archsimd.Uint32x4) {
|
|
// arm64:`VMOV V1.D\[0\], V0.D\[1\]`
|
|
sinkU32 = loToHiUint32Vec(x, lo)
|
|
}
|
|
|
|
func setHiFloat64(x, lo archsimd.Float64x2) {
|
|
// arm64:`VMOV V1.D\[0\], V0.D\[1\]`
|
|
sinkF64 = x.SetElem(1, lo.GetElem(0))
|
|
}
|
|
|
|
func getHiFloat32(x archsimd.Float32x4) {
|
|
// arm64:`VDUP V0.D\[1\],`
|
|
sinkF32 = x.HiToLo()
|
|
}
|
|
|
|
func getHiFloat64(x archsimd.Float64x2) {
|
|
// arm64:`VDUP V0.D\[1\],`
|
|
sinkF64 = x.HiToLo()
|
|
}
|
|
|
|
func foldGetHiSetHiMuls(a, b archsimd.Uint16x8) archsimd.Uint16x8 {
|
|
wLo := a.MulWidenLo(b) // arm64: `VUMULL V0.H4, V1.H4, V[0-9].S4`
|
|
wHi := a.HiToLo().MulWidenLo(b.HiToLo()) // arm64: `VUMULL2 V1.H8, V0.H8, V[0-9].S4` -`VDUP`
|
|
narrowLo := wLo.TruncToUint16() // arm64: `VXTN V[0-9]+.S4, V0.H4`
|
|
narrowHi := wHi.TruncToUint16() // folded into next line
|
|
return loToHiUint16Vec(narrowLo, narrowHi) // arm64: `VXTN2 V[0-9]+.S4, V0.H8`
|
|
}
|
|
|
|
func carrylessMultiplies(x, y archsimd.Uint64x2) archsimd.Uint64x2 {
|
|
lo := x.CarrylessMultiplyEven(y) // arm64:`VPMULL V` -`VPMULL2`
|
|
hi := x.HiToLo().CarrylessMultiplyEven(y.HiToLo()) // arm64:`VPMULL2 V` -`VPMULL `
|
|
return lo.Xor(hi)
|
|
}
|
|
|
|
func mergeWithNotMask(x, y archsimd.Int8x16, mask archsimd.Mask8x16, f1, f2 archsimd.Float32x4) {
|
|
// arm64:`VBIT` -`VBIF` -`VNOT`
|
|
sinkI8 = x.IfElse(mask.Not(), y)
|
|
// arm64: `VFCMEQ`
|
|
eq := f1.Equal(f2)
|
|
// The next line `ne` should be CSEd with `eq` above
|
|
ne := f1.NotEqual(f2) // arm64: -`.*`
|
|
feq := f1.IfElse(eq, f2) // arm64:`VBIF`
|
|
fne := f1.IfElse(ne, f2) // arm64:`VBIT`
|
|
sinkF32 = fne.Add(feq)
|
|
}
|
|
|
|
// loToHiUint32Vec returns a vector with the lower 64 bits of x preserved and
|
|
// the upper 64 bits replaced with the lower 64 bits of lo.
|
|
// It routes through Float64x2 to stay in the FP/SIMD register file,
|
|
// avoiding a round-trip through a GP register.
|
|
func loToHiUint32Vec(x, lo archsimd.Uint32x4) archsimd.Uint32x4 {
|
|
return x.ReshapeToUint64s().BitsToFloat64().SetElem(1, lo.ReshapeToUint64s().BitsToFloat64().GetElem(0)).ToBits().ReshapeToUint32s()
|
|
}
|
|
|
|
// loToHiUint16Vec returns a vector with the lower 64 bits of x preserved and
|
|
// the upper 64 bits replaced with the lower 64 bits of lo.
|
|
// It routes through Float64x2 to stay in the FP/SIMD register file,
|
|
// avoiding a round-trip through a GP register.
|
|
func loToHiUint16Vec(x, lo archsimd.Uint16x8) archsimd.Uint16x8 {
|
|
return x.ReshapeToUint64s().BitsToFloat64().SetElem(1, lo.ReshapeToUint64s().BitsToFloat64().GetElem(0)).ToBits().ReshapeToUint16s()
|
|
}
|