mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: add 64-bit select-from-pair methods
these are in the same style as the 32-bit select-from-pair, including the grouped variant. This does not quite capture the full awesome power of VSHUFPD where it can select differently in each group; that will be some other method, that is more complex. Change-Id: I807ddd7c1256103b5b0d7c5d60bd70b185e3aaf0 Reviewed-on: https://go-review.googlesource.com/c/go/+/705695 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
25c36b95d1
commit
ea3b2ecd28
4 changed files with 820 additions and 352 deletions
|
|
@ -1632,12 +1632,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
|||
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
|
||||
|
||||
sfp := func(method string, hwop ssa.Op, vectype *types.Type) {
|
||||
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
|
||||
addF("simd", method,
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
x, a, b, c, d, y := args[0], args[1], args[2], args[3], args[4], args[5]
|
||||
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 && c.Op == ssa.OpConst8 && d.Op == ssa.OpConst8 {
|
||||
return selectFromPair(x, a, b, c, d, y, s, hwop, vectype)
|
||||
return select4FromPair(x, a, b, c, d, y, s, hwop, vectype)
|
||||
} else {
|
||||
return s.callResult(n, callNormal)
|
||||
}
|
||||
|
|
@ -1645,25 +1645,64 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
|||
sys.AMD64)
|
||||
}
|
||||
|
||||
sfp("Int32x4.SelectFromPair", ssa.OpconcatSelectedConstantInt32x4, types.TypeVec128)
|
||||
sfp("Uint32x4.SelectFromPair", ssa.OpconcatSelectedConstantUint32x4, types.TypeVec128)
|
||||
sfp("Float32x4.SelectFromPair", ssa.OpconcatSelectedConstantFloat32x4, types.TypeVec128)
|
||||
sfp4("Int32x4.SelectFromPair", ssa.OpconcatSelectedConstantInt32x4, types.TypeVec128)
|
||||
sfp4("Uint32x4.SelectFromPair", ssa.OpconcatSelectedConstantUint32x4, types.TypeVec128)
|
||||
sfp4("Float32x4.SelectFromPair", ssa.OpconcatSelectedConstantFloat32x4, types.TypeVec128)
|
||||
|
||||
sfp("Int32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x8, types.TypeVec256)
|
||||
sfp("Uint32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x8, types.TypeVec256)
|
||||
sfp("Float32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x8, types.TypeVec256)
|
||||
sfp4("Int32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x8, types.TypeVec256)
|
||||
sfp4("Uint32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x8, types.TypeVec256)
|
||||
sfp4("Float32x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x8, types.TypeVec256)
|
||||
|
||||
sfp("Int32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x16, types.TypeVec512)
|
||||
sfp("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
|
||||
sfp("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
|
||||
sfp4("Int32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt32x16, types.TypeVec512)
|
||||
sfp4("Uint32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint32x16, types.TypeVec512)
|
||||
sfp4("Float32x16.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat32x16, types.TypeVec512)
|
||||
|
||||
sfp2 := func(method string, hwop ssa.Op, vectype *types.Type, cscimm func(i, j uint8) int64) {
|
||||
addF("simd", method,
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
x, a, b, y := args[0], args[1], args[2], args[3]
|
||||
if a.Op == ssa.OpConst8 && b.Op == ssa.OpConst8 {
|
||||
return select2FromPair(x, a, b, y, s, hwop, vectype, cscimm)
|
||||
} else {
|
||||
return s.callResult(n, callNormal)
|
||||
}
|
||||
},
|
||||
sys.AMD64)
|
||||
}
|
||||
|
||||
sfp2("Uint64x2.SelectFromPair", ssa.OpconcatSelectedConstantUint64x2, types.TypeVec128, cscimm2)
|
||||
sfp2("Int64x2.SelectFromPair", ssa.OpconcatSelectedConstantInt64x2, types.TypeVec128, cscimm2)
|
||||
sfp2("Float64x2.SelectFromPair", ssa.OpconcatSelectedConstantFloat64x2, types.TypeVec128, cscimm2)
|
||||
|
||||
sfp2("Uint64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint64x4, types.TypeVec256, cscimm2g2)
|
||||
sfp2("Int64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt64x4, types.TypeVec256, cscimm2g2)
|
||||
sfp2("Float64x4.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat64x4, types.TypeVec256, cscimm2g2)
|
||||
|
||||
sfp2("Uint64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedUint64x8, types.TypeVec512, cscimm2g4)
|
||||
sfp2("Int64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedInt64x8, types.TypeVec512, cscimm2g4)
|
||||
sfp2("Float64x8.SelectFromPairGrouped", ssa.OpconcatSelectedConstantGroupedFloat64x8, types.TypeVec512, cscimm2g4)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func cscimm(a, b, c, d uint8) int64 {
|
||||
func cscimm4(a, b, c, d uint8) int64 {
|
||||
return se(a + b<<2 + c<<4 + d<<6)
|
||||
}
|
||||
|
||||
func cscimm2(a, b uint8) int64 {
|
||||
return se(a + b<<1)
|
||||
}
|
||||
|
||||
func cscimm2g2(a, b uint8) int64 {
|
||||
g := cscimm2(a, b)
|
||||
return int64(int8(g + g<<2))
|
||||
}
|
||||
|
||||
func cscimm2g4(a, b uint8) int64 {
|
||||
g := cscimm2g2(a, b)
|
||||
return int64(int8(g + g<<4))
|
||||
}
|
||||
|
||||
const (
|
||||
_LLLL = iota
|
||||
_HLLL
|
||||
|
|
@ -1683,7 +1722,32 @@ const (
|
|||
_HHHH
|
||||
)
|
||||
|
||||
func selectFromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
|
||||
const (
|
||||
_LL = iota
|
||||
_HL
|
||||
_LH
|
||||
_HH
|
||||
)
|
||||
|
||||
func select2FromPair(x, _a, _b, y *ssa.Value, s *state, op ssa.Op, t *types.Type, csc func(a, b uint8) int64) *ssa.Value {
|
||||
a, b := uint8(_a.AuxInt8()), uint8(_b.AuxInt8())
|
||||
pattern := (a&2)>>1 + (b & 2)
|
||||
a, b = a&1, b&1
|
||||
|
||||
switch pattern {
|
||||
case _LL:
|
||||
return s.newValue2I(op, t, csc(a, b), x, x)
|
||||
case _HH:
|
||||
return s.newValue2I(op, t, csc(a, b), y, y)
|
||||
case _LH:
|
||||
return s.newValue2I(op, t, csc(a, b), x, y)
|
||||
case _HL:
|
||||
return s.newValue2I(op, t, csc(a, b), y, x)
|
||||
}
|
||||
panic("The preceding switch should have been exhaustive")
|
||||
}
|
||||
|
||||
func select4FromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *types.Type) *ssa.Value {
|
||||
a, b, c, d := uint8(_a.AuxInt8()), uint8(_b.AuxInt8()), uint8(_c.AuxInt8()), uint8(_d.AuxInt8())
|
||||
pattern := a>>2 + (b&4)>>1 + (c & 4) + (d&4)<<1
|
||||
|
||||
|
|
@ -1692,54 +1756,54 @@ func selectFromPair(x, _a, _b, _c, _d, y *ssa.Value, s *state, op ssa.Op, t *typ
|
|||
switch pattern {
|
||||
case _LLLL:
|
||||
// TODO DETECT 0,1,2,3, 0,0,0,0
|
||||
return s.newValue2I(op, t, cscimm(a, b, c, d), x, x)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, c, d), x, x)
|
||||
case _HHHH:
|
||||
// TODO DETECT 0,1,2,3, 0,0,0,0
|
||||
return s.newValue2I(op, t, cscimm(a, b, c, d), y, y)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, c, d), y, y)
|
||||
case _LLHH:
|
||||
return s.newValue2I(op, t, cscimm(a, b, c, d), x, y)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, c, d), x, y)
|
||||
case _HHLL:
|
||||
return s.newValue2I(op, t, cscimm(a, b, c, d), y, x)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, c, d), y, x)
|
||||
|
||||
case _HLLL:
|
||||
z := s.newValue2I(op, t, cscimm(a, a, b, b), y, x)
|
||||
return s.newValue2I(op, t, cscimm(0, 2, c, d), z, x)
|
||||
z := s.newValue2I(op, t, cscimm4(a, a, b, b), y, x)
|
||||
return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, x)
|
||||
case _LHLL:
|
||||
z := s.newValue2I(op, t, cscimm(a, a, b, b), x, y)
|
||||
return s.newValue2I(op, t, cscimm(0, 2, c, d), z, x)
|
||||
z := s.newValue2I(op, t, cscimm4(a, a, b, b), x, y)
|
||||
return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, x)
|
||||
case _HLHH:
|
||||
z := s.newValue2I(op, t, cscimm(a, a, b, b), y, x)
|
||||
return s.newValue2I(op, t, cscimm(0, 2, c, d), z, y)
|
||||
z := s.newValue2I(op, t, cscimm4(a, a, b, b), y, x)
|
||||
return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, y)
|
||||
case _LHHH:
|
||||
z := s.newValue2I(op, t, cscimm(a, a, b, b), x, y)
|
||||
return s.newValue2I(op, t, cscimm(0, 2, c, d), z, y)
|
||||
z := s.newValue2I(op, t, cscimm4(a, a, b, b), x, y)
|
||||
return s.newValue2I(op, t, cscimm4(0, 2, c, d), z, y)
|
||||
|
||||
case _LLLH:
|
||||
z := s.newValue2I(op, t, cscimm(c, c, d, d), x, y)
|
||||
return s.newValue2I(op, t, cscimm(a, b, 0, 2), x, z)
|
||||
z := s.newValue2I(op, t, cscimm4(c, c, d, d), x, y)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, 0, 2), x, z)
|
||||
case _LLHL:
|
||||
z := s.newValue2I(op, t, cscimm(c, c, d, d), y, x)
|
||||
return s.newValue2I(op, t, cscimm(a, b, 0, 2), x, z)
|
||||
z := s.newValue2I(op, t, cscimm4(c, c, d, d), y, x)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, 0, 2), x, z)
|
||||
|
||||
case _HHLH:
|
||||
z := s.newValue2I(op, t, cscimm(c, c, d, d), x, y)
|
||||
return s.newValue2I(op, t, cscimm(a, b, 0, 2), y, z)
|
||||
z := s.newValue2I(op, t, cscimm4(c, c, d, d), x, y)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, 0, 2), y, z)
|
||||
|
||||
case _HHHL:
|
||||
z := s.newValue2I(op, t, cscimm(c, c, d, d), y, x)
|
||||
return s.newValue2I(op, t, cscimm(a, b, 0, 2), y, z)
|
||||
z := s.newValue2I(op, t, cscimm4(c, c, d, d), y, x)
|
||||
return s.newValue2I(op, t, cscimm4(a, b, 0, 2), y, z)
|
||||
|
||||
case _LHLH:
|
||||
z := s.newValue2I(op, t, cscimm(a, c, b, d), x, y)
|
||||
z := s.newValue2I(op, t, cscimm4(a, c, b, d), x, y)
|
||||
return s.newValue2I(op, t, se(0b11_01_10_00), z, z)
|
||||
case _HLHL:
|
||||
z := s.newValue2I(op, t, cscimm(b, d, a, c), x, y)
|
||||
z := s.newValue2I(op, t, cscimm4(b, d, a, c), x, y)
|
||||
return s.newValue2I(op, t, se(0b01_11_00_10), z, z)
|
||||
case _HLLH:
|
||||
z := s.newValue2I(op, t, cscimm(b, c, a, d), x, y)
|
||||
z := s.newValue2I(op, t, cscimm4(b, c, a, d), x, y)
|
||||
return s.newValue2I(op, t, se(0b11_01_00_10), z, z)
|
||||
case _LHHL:
|
||||
z := s.newValue2I(op, t, cscimm(a, d, b, c), x, y)
|
||||
z := s.newValue2I(op, t, cscimm4(a, d, b, c), x, y)
|
||||
return s.newValue2I(op, t, se(0b01_11_10_00), z, z)
|
||||
}
|
||||
panic("The preceding switch should have been exhaustive")
|
||||
|
|
@ -1906,7 +1970,7 @@ func opLen2Imm8_II(op ssa.Op, t *types.Type, _ int) func(s *state, n *ir.CallExp
|
|||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
if args[1].Op == ssa.OpConst8 && args[2].Op == ssa.OpConst8 && args[1].AuxInt & ^3 == 0 && args[2].AuxInt & ^3 == 0 {
|
||||
i1, i2 := args[1].AuxInt, args[2].AuxInt
|
||||
return s.newValue2I(op, t, i1+i2<<4, args[0], args[3])
|
||||
return s.newValue2I(op, t, int64(int8(i1+i2<<4)), args[0], args[3])
|
||||
}
|
||||
four := s.constInt64(types.Types[types.TUINT8], 4)
|
||||
shifted := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT8], args[2], four)
|
||||
|
|
|
|||
|
|
@ -595,7 +595,7 @@ func TestIsZero(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestSelectFromPairConst(t *testing.T) {
|
||||
func TestSelect4FromPairConst(t *testing.T) {
|
||||
x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
|
||||
y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
|
||||
|
||||
|
|
@ -652,7 +652,7 @@ func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) sim
|
|||
return x.SelectFromPair(a, b, c, d, y)
|
||||
}
|
||||
|
||||
func TestSelectFromPairVar(t *testing.T) {
|
||||
func TestSelect4FromPairVar(t *testing.T) {
|
||||
x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
|
||||
y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
|
||||
|
||||
|
|
@ -704,7 +704,7 @@ func TestSelectFromPairVar(t *testing.T) {
|
|||
foo(hllh, 4, 0, 1, 5)
|
||||
}
|
||||
|
||||
func TestSelectFromPairConstGroupedFloat32x8(t *testing.T) {
|
||||
func TestSelect4FromPairConstGrouped(t *testing.T) {
|
||||
x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
|
||||
y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
|
||||
|
||||
|
|
@ -887,5 +887,119 @@ func TestSelect128FromPairVar(t *testing.T) {
|
|||
foo(cd, 2, 3)
|
||||
foo(da, 3, 0)
|
||||
foo(dc, 3, 2)
|
||||
|
||||
}
|
||||
|
||||
func TestSelect2FromPairConst(t *testing.T) {
|
||||
x := simd.LoadUint64x2Slice([]uint64{0, 1})
|
||||
y := simd.LoadUint64x2Slice([]uint64{2, 3})
|
||||
|
||||
ll := x.SelectFromPair(0, 1, y)
|
||||
hh := x.SelectFromPair(3, 2, y)
|
||||
lh := x.SelectFromPair(0, 3, y)
|
||||
hl := x.SelectFromPair(2, 1, y)
|
||||
|
||||
r := make([]uint64, 2, 2)
|
||||
|
||||
foo := func(v simd.Uint64x2, a, b uint64) {
|
||||
v.StoreSlice(r)
|
||||
checkSlices[uint64](t, r, []uint64{a, b})
|
||||
}
|
||||
|
||||
foo(ll, 0, 1)
|
||||
foo(hh, 3, 2)
|
||||
foo(lh, 0, 3)
|
||||
foo(hl, 2, 1)
|
||||
}
|
||||
|
||||
func TestSelect2FromPairConstGroupedUint(t *testing.T) {
|
||||
x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
|
||||
y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
|
||||
|
||||
ll := x.SelectFromPairGrouped(0, 1, y)
|
||||
hh := x.SelectFromPairGrouped(3, 2, y)
|
||||
lh := x.SelectFromPairGrouped(0, 3, y)
|
||||
hl := x.SelectFromPairGrouped(2, 1, y)
|
||||
|
||||
r := make([]uint64, 4, 4)
|
||||
|
||||
foo := func(v simd.Uint64x4, a, b uint64) {
|
||||
v.StoreSlice(r)
|
||||
checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
|
||||
}
|
||||
|
||||
foo(ll, 0, 1)
|
||||
foo(hh, 3, 2)
|
||||
foo(lh, 0, 3)
|
||||
foo(hl, 2, 1)
|
||||
}
|
||||
|
||||
func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
|
||||
x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
|
||||
y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
|
||||
|
||||
ll := x.SelectFromPairGrouped(0, 1, y)
|
||||
hh := x.SelectFromPairGrouped(3, 2, y)
|
||||
lh := x.SelectFromPairGrouped(0, 3, y)
|
||||
hl := x.SelectFromPairGrouped(2, 1, y)
|
||||
|
||||
r := make([]float64, 4, 4)
|
||||
|
||||
foo := func(v simd.Float64x4, a, b float64) {
|
||||
v.StoreSlice(r)
|
||||
checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
|
||||
}
|
||||
|
||||
foo(ll, 0, 1)
|
||||
foo(hh, 3, 2)
|
||||
foo(lh, 0, 3)
|
||||
foo(hl, 2, 1)
|
||||
}
|
||||
|
||||
func TestSelect2FromPairConstGroupedInt(t *testing.T) {
|
||||
x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
|
||||
y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
|
||||
|
||||
ll := x.SelectFromPairGrouped(0, 1, y)
|
||||
hh := x.SelectFromPairGrouped(3, 2, y)
|
||||
lh := x.SelectFromPairGrouped(0, 3, y)
|
||||
hl := x.SelectFromPairGrouped(2, 1, y)
|
||||
|
||||
r := make([]int64, 4, 4)
|
||||
|
||||
foo := func(v simd.Int64x4, a, b int64) {
|
||||
v.StoreSlice(r)
|
||||
checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
|
||||
}
|
||||
|
||||
foo(ll, 0, 1)
|
||||
foo(hh, 3, 2)
|
||||
foo(lh, 0, 3)
|
||||
foo(hl, 2, 1)
|
||||
}
|
||||
|
||||
func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
|
||||
if !simd.HasAVX512() {
|
||||
t.Skip("Test requires HasAVX512, not available on this hardware")
|
||||
return
|
||||
}
|
||||
|
||||
x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
|
||||
y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
|
||||
|
||||
ll := x.SelectFromPairGrouped(0, 1, y)
|
||||
hh := x.SelectFromPairGrouped(3, 2, y)
|
||||
lh := x.SelectFromPairGrouped(0, 3, y)
|
||||
hl := x.SelectFromPairGrouped(2, 1, y)
|
||||
|
||||
r := make([]int64, 8, 8)
|
||||
|
||||
foo := func(v simd.Int64x8, a, b int64) {
|
||||
v.StoreSlice(r)
|
||||
checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
|
||||
}
|
||||
|
||||
foo(ll, 0, 1)
|
||||
foo(hh, 3, 2)
|
||||
foo(lh, 0, 3)
|
||||
foo(hl, 2, 1)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,53 +99,53 @@ func select2x4x32(x Int32x4, a, b, c, d uint8, y Int32x4) Int32x4 {
|
|||
|
||||
switch pattern {
|
||||
case _LLLL:
|
||||
return x.concatSelectedConstant(cscimm(a, b, c, d), x)
|
||||
return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
|
||||
case _HHHH:
|
||||
return y.concatSelectedConstant(cscimm(a, b, c, d), y)
|
||||
return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
|
||||
case _LLHH:
|
||||
return x.concatSelectedConstant(cscimm(a, b, c, d), y)
|
||||
return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
|
||||
case _HHLL:
|
||||
return y.concatSelectedConstant(cscimm(a, b, c, d), x)
|
||||
return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
|
||||
|
||||
case _HLLL:
|
||||
z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
|
||||
return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
|
||||
z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
|
||||
return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
|
||||
case _LHLL:
|
||||
z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
|
||||
return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
|
||||
z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
|
||||
return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
|
||||
|
||||
case _HLHH:
|
||||
z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
|
||||
return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
|
||||
z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
|
||||
return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
|
||||
case _LHHH:
|
||||
z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
|
||||
return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
|
||||
z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
|
||||
return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
|
||||
|
||||
case _LLLH:
|
||||
z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
|
||||
return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
|
||||
z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
|
||||
return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
|
||||
case _LLHL:
|
||||
z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
|
||||
return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
|
||||
z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
|
||||
return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
|
||||
case _HHLH:
|
||||
z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
|
||||
return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
|
||||
z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
|
||||
return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
|
||||
case _HHHL:
|
||||
z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
|
||||
return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
|
||||
z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
|
||||
return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
|
||||
|
||||
case _LHLH:
|
||||
z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
|
||||
return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
|
||||
z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
|
||||
return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
|
||||
case _HLHL:
|
||||
z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
|
||||
return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
|
||||
z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
|
||||
return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
|
||||
case _HLLH:
|
||||
z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
|
||||
return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
|
||||
z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
|
||||
return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
|
||||
case _LHHL:
|
||||
z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
|
||||
return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
|
||||
z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
|
||||
return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
|
||||
}
|
||||
panic("missing case, switch should be exhaustive")
|
||||
}
|
||||
|
|
@ -180,53 +180,53 @@ func select2x8x32Grouped(x Int32x8, a, b, c, d uint8, y Int32x8) Int32x8 {
|
|||
|
||||
switch pattern {
|
||||
case _LLLL:
|
||||
return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
|
||||
return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
|
||||
case _HHHH:
|
||||
return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
|
||||
return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
|
||||
case _LLHH:
|
||||
return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
|
||||
return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
|
||||
case _HHLL:
|
||||
return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
|
||||
return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
|
||||
|
||||
case _HLLL:
|
||||
z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
|
||||
return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
|
||||
z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
|
||||
return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
|
||||
case _LHLL:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
|
||||
return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
|
||||
return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
|
||||
|
||||
case _HLHH:
|
||||
z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
|
||||
return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
|
||||
z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
|
||||
return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
|
||||
case _LHHH:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
|
||||
return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
|
||||
return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
|
||||
|
||||
case _LLLH:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
|
||||
return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
|
||||
return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
|
||||
case _LLHL:
|
||||
z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
|
||||
return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
|
||||
z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
|
||||
return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
|
||||
case _HHLH:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
|
||||
return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
|
||||
return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
|
||||
case _HHHL:
|
||||
z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
|
||||
return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
|
||||
z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
|
||||
return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
|
||||
|
||||
case _LHLH:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
|
||||
return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
|
||||
return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
|
||||
case _HLHL:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
|
||||
return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
|
||||
return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
|
||||
case _HLLH:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
|
||||
return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
|
||||
return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
|
||||
case _LHHL:
|
||||
z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
|
||||
return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
|
||||
z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
|
||||
return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
|
||||
}
|
||||
panic("missing case, switch should be exhaustive")
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue