mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd, cmd/compile: add 128 bit select-from-pair
Using this name until a better one appears: x.Select128FromPair(3, 2, y) Includes test for constant and variable case. Checks for unexpected immediates (using the zeroing flag, which is not supported for this intrinsic) and panics. Change-Id: I9249475d6572968c127b4ee9e00328d717c07578 Reviewed-on: https://go-review.googlesource.com/c/go/+/705496 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
f0e281e693
commit
25c36b95d1
18 changed files with 369 additions and 5 deletions
|
|
@ -1053,6 +1053,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
|||
ssa.OpAMD64VGF2P8AFFINEINVQB128,
|
||||
ssa.OpAMD64VGF2P8AFFINEINVQB256,
|
||||
ssa.OpAMD64VGF2P8AFFINEINVQB512,
|
||||
ssa.OpAMD64VPERM2F128256,
|
||||
ssa.OpAMD64VPERM2I128256,
|
||||
ssa.OpAMD64VINSERTF128256,
|
||||
ssa.OpAMD64VINSERTF64X4512,
|
||||
ssa.OpAMD64VINSERTI128256,
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ type symsStruct struct {
|
|||
PanicdottypeI *obj.LSym
|
||||
Panicnildottype *obj.LSym
|
||||
Panicoverflow *obj.LSym
|
||||
PanicSimdImm *obj.LSym
|
||||
Racefuncenter *obj.LSym
|
||||
Racefuncexit *obj.LSym
|
||||
Raceread *obj.LSym
|
||||
|
|
|
|||
|
|
@ -938,6 +938,12 @@
|
|||
(ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
|
||||
(ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
|
||||
(ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
|
||||
(Select128FromPairFloat32x8 ...) => (VPERM2F128256 ...)
|
||||
(Select128FromPairFloat64x4 ...) => (VPERM2F128256 ...)
|
||||
(Select128FromPairInt32x8 ...) => (VPERM2I128256 ...)
|
||||
(Select128FromPairInt64x4 ...) => (VPERM2I128256 ...)
|
||||
(Select128FromPairUint32x8 ...) => (VPERM2I128256 ...)
|
||||
(Select128FromPairUint64x4 ...) => (VPERM2I128256 ...)
|
||||
(SetElemFloat32x4 ...) => (VPINSRD128 ...)
|
||||
(SetElemFloat64x2 ...) => (VPINSRQ128 ...)
|
||||
(SetElemInt8x16 ...) => (VPINSRB128 ...)
|
||||
|
|
|
|||
|
|
@ -1212,6 +1212,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
|||
{name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||
{name: "VPERM2F128256", argLength: 2, reg: v21, asm: "VPERM2F128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPERM2I128256", argLength: 2, reg: v21, asm: "VPERM2I128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||
{name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
{name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||
|
|
|
|||
|
|
@ -1199,6 +1199,12 @@ func simdGenericOps() []opData {
|
|||
{name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "Select128FromPairUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
|
||||
|
|
|
|||
|
|
@ -2444,6 +2444,8 @@ const (
|
|||
OpAMD64VPRORQMasked128
|
||||
OpAMD64VPRORQMasked256
|
||||
OpAMD64VPRORQMasked512
|
||||
OpAMD64VPERM2F128256
|
||||
OpAMD64VPERM2I128256
|
||||
OpAMD64VPINSRD128
|
||||
OpAMD64VPINSRQ128
|
||||
OpAMD64VPINSRB128
|
||||
|
|
@ -6594,6 +6596,12 @@ const (
|
|||
OpRoundToEvenScaledResidueFloat64x2
|
||||
OpRoundToEvenScaledResidueFloat64x4
|
||||
OpRoundToEvenScaledResidueFloat64x8
|
||||
OpSelect128FromPairFloat32x8
|
||||
OpSelect128FromPairFloat64x4
|
||||
OpSelect128FromPairInt32x8
|
||||
OpSelect128FromPairInt64x4
|
||||
OpSelect128FromPairUint32x8
|
||||
OpSelect128FromPairUint64x4
|
||||
OpSetElemFloat32x4
|
||||
OpSetElemFloat64x2
|
||||
OpSetElemInt8x16
|
||||
|
|
@ -37656,6 +37664,36 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPERM2F128256",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPERM2F128,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPERM2I128256",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
asm: x86.AVPERM2I128,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
{1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPINSRD128",
|
||||
auxType: auxUInt8,
|
||||
|
|
@ -82360,6 +82398,42 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Select128FromPairFloat32x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Select128FromPairFloat64x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Select128FromPairInt32x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Select128FromPairInt64x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Select128FromPairUint32x8",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Select128FromPairUint64x4",
|
||||
auxType: auxUInt8,
|
||||
argLen: 2,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "SetElemFloat32x4",
|
||||
auxType: auxUInt8,
|
||||
|
|
|
|||
|
|
@ -4991,6 +4991,24 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
return rewriteValueAMD64_OpSelect0(v)
|
||||
case OpSelect1:
|
||||
return rewriteValueAMD64_OpSelect1(v)
|
||||
case OpSelect128FromPairFloat32x8:
|
||||
v.Op = OpAMD64VPERM2F128256
|
||||
return true
|
||||
case OpSelect128FromPairFloat64x4:
|
||||
v.Op = OpAMD64VPERM2F128256
|
||||
return true
|
||||
case OpSelect128FromPairInt32x8:
|
||||
v.Op = OpAMD64VPERM2I128256
|
||||
return true
|
||||
case OpSelect128FromPairInt64x4:
|
||||
v.Op = OpAMD64VPERM2I128256
|
||||
return true
|
||||
case OpSelect128FromPairUint32x8:
|
||||
v.Op = OpAMD64VPERM2I128256
|
||||
return true
|
||||
case OpSelect128FromPairUint64x4:
|
||||
v.Op = OpAMD64VPERM2I128256
|
||||
return true
|
||||
case OpSelectN:
|
||||
return rewriteValueAMD64_OpSelectN(v)
|
||||
case OpSetElemFloat32x4:
|
||||
|
|
|
|||
|
|
@ -1842,7 +1842,9 @@ func immJumpTable(s *state, idx *ssa.Value, intrinsicCall *ir.CallExpr, genOp fu
|
|||
for i, t := range targets {
|
||||
s.startBlock(t)
|
||||
genOp(s, i)
|
||||
if t.Kind != ssa.BlockExit {
|
||||
t.AddEdgeTo(bEnd)
|
||||
}
|
||||
s.endBlock()
|
||||
}
|
||||
|
||||
|
|
@ -1899,6 +1901,28 @@ func opLen2Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.Ca
|
|||
}
|
||||
}
|
||||
|
||||
// Two immediates instead of just 1. Offset is ignored, so it is a _ parameter instead.
|
||||
func opLen2Imm8_II(op ssa.Op, t *types.Type, _ int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
if args[1].Op == ssa.OpConst8 && args[2].Op == ssa.OpConst8 && args[1].AuxInt & ^3 == 0 && args[2].AuxInt & ^3 == 0 {
|
||||
i1, i2 := args[1].AuxInt, args[2].AuxInt
|
||||
return s.newValue2I(op, t, i1+i2<<4, args[0], args[3])
|
||||
}
|
||||
four := s.constInt64(types.Types[types.TUINT8], 4)
|
||||
shifted := s.newValue2(ssa.OpLsh8x8, types.Types[types.TUINT8], args[2], four)
|
||||
combined := s.newValue2(ssa.OpAdd8, types.Types[types.TUINT8], args[1], shifted)
|
||||
return immJumpTable(s, combined, n, func(sNew *state, idx int) {
|
||||
// Encode as int8 due to requirement of AuxInt, check its comment for details.
|
||||
// TODO for "zeroing" values, panic instead.
|
||||
if idx & ^(3+3<<4) == 0 {
|
||||
s.vars[n] = sNew.newValue2I(op, t, int64(int8(idx)), args[0], args[3])
|
||||
} else {
|
||||
sNew.rtcall(ir.Syms.PanicSimdImm, false, nil)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func opLen3Imm8_2I(op ssa.Op, t *types.Type, offset int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
if args[2].Op == ssa.OpConst8 {
|
||||
|
|
|
|||
|
|
@ -950,6 +950,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
|
||||
addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
|
||||
addF(simdPackage, "Float32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat32x8, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Float64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat64x4, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt32x8, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt64x4, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint32x8, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Uint64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint64x4, types.TypeVec256, 0), sys.AMD64)
|
||||
addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
|
||||
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -141,6 +141,7 @@ func InitConfig() {
|
|||
ir.Syms.Panicnildottype = typecheck.LookupRuntimeFunc("panicnildottype")
|
||||
ir.Syms.Panicoverflow = typecheck.LookupRuntimeFunc("panicoverflow")
|
||||
ir.Syms.Panicshift = typecheck.LookupRuntimeFunc("panicshift")
|
||||
ir.Syms.PanicSimdImm = typecheck.LookupRuntimeFunc("panicSimdImm")
|
||||
ir.Syms.Racefuncenter = typecheck.LookupRuntimeFunc("racefuncenter")
|
||||
ir.Syms.Racefuncexit = typecheck.LookupRuntimeFunc("racefuncexit")
|
||||
ir.Syms.Raceread = typecheck.LookupRuntimeFunc("raceread")
|
||||
|
|
|
|||
|
|
@ -341,6 +341,13 @@ func panicmemAddr(addr uintptr) {
|
|||
panic(errorAddressString{msg: "invalid memory address or nil pointer dereference", addr: addr})
|
||||
}
|
||||
|
||||
var simdImmError = error(errorString("out-of-range immediate for simd intrinsic"))
|
||||
|
||||
func panicSimdImm() {
|
||||
panicCheck2("simd immediate error")
|
||||
panic(simdImmError)
|
||||
}
|
||||
|
||||
// Create a new deferred function fn, which has no arguments and results.
|
||||
// The compiler turns a defer statement into a call to this.
|
||||
func deferproc(fn func()) {
|
||||
|
|
|
|||
|
|
@ -56,6 +56,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
|||
{{end}}
|
||||
{{define "op2Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
{{end}}
|
||||
{{define "op2Imm8_II"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen2Imm8_II(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
{{end}}
|
||||
{{define "op3Imm8"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
{{end}}
|
||||
{{define "op3Imm8_2I"}} addF(simdPackage, "{{(index .In 1).Go}}.{{.Go}}", opLen3Imm8_2I(ssa.Op{{.GenericName}}, {{.SSAType}}, {{(index .In 0).ImmOffset}}), sys.AMD64)
|
||||
|
|
|
|||
|
|
@ -354,6 +354,15 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"
|
|||
func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uint8) {{.GoType}}
|
||||
{{end}}
|
||||
|
||||
{{define "op2Imm8_II"}}
|
||||
{{if .Documentation}}{{.Documentation}}
|
||||
//{{end}}
|
||||
// {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// {{.ImmName}} should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
|
||||
func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
|
||||
{{end}}
|
||||
|
||||
{{define "op3Imm8"}}
|
||||
{{if .Documentation}}{{.Documentation}}
|
||||
|
|
|
|||
|
|
@ -175,3 +175,9 @@
|
|||
// selecting element 1 from y's upper 128 bits (11).
|
||||
// This differs from the same method applied to a 32x8 vector, where
|
||||
// the 8-bit constant performs the same selection on both subvectors.
|
||||
|
||||
- go: Select128FromPair
|
||||
commutative: false
|
||||
documentation: !string |-
|
||||
// NAME selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
|
|
|
|||
|
|
@ -721,7 +721,6 @@
|
|||
out:
|
||||
- *v
|
||||
|
||||
|
||||
- go: concatSelectedConstantGrouped
|
||||
asm: VSHUFPD
|
||||
in:
|
||||
|
|
@ -771,3 +770,74 @@
|
|||
inVariant: []
|
||||
out:
|
||||
- *v
|
||||
|
||||
- go: Select128FromPair
|
||||
asm: VPERM2F128
|
||||
operandOrder: II
|
||||
in:
|
||||
- &v
|
||||
go: $t
|
||||
class: vreg
|
||||
base: float
|
||||
bits: 256
|
||||
- *v
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: "lo, hi"
|
||||
inVariant: []
|
||||
out:
|
||||
- *v
|
||||
|
||||
- go: Select128FromPair
|
||||
asm: VPERM2F128
|
||||
operandOrder: II
|
||||
in:
|
||||
- &v
|
||||
go: $t
|
||||
class: vreg
|
||||
base: float
|
||||
bits: 256
|
||||
OverwriteElementBits: 32
|
||||
- *v
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: "lo, hi"
|
||||
inVariant: []
|
||||
out:
|
||||
- *v
|
||||
|
||||
- go: Select128FromPair
|
||||
asm: VPERM2I128
|
||||
operandOrder: II
|
||||
in:
|
||||
- &v
|
||||
go: $t
|
||||
class: vreg
|
||||
base: int|uint
|
||||
bits: 256
|
||||
OverwriteElementBits: 64
|
||||
- *v
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: "lo, hi"
|
||||
inVariant: []
|
||||
out:
|
||||
- *v
|
||||
|
||||
- go: Select128FromPair
|
||||
asm: VPERM2I128
|
||||
operandOrder: II
|
||||
in:
|
||||
- &v
|
||||
go: $t
|
||||
class: vreg
|
||||
base: int|uint
|
||||
bits: 256
|
||||
OverwriteElementBits: 32
|
||||
- *v
|
||||
- class: immediate
|
||||
immOffset: 0
|
||||
name: "lo, hi"
|
||||
inVariant: []
|
||||
out:
|
||||
- *v
|
||||
|
|
|
|||
|
|
@ -106,8 +106,8 @@ func (b *DefBuilder) Add(name string, v *Value) {
|
|||
if b.fields == nil {
|
||||
b.fields = make(map[string]*Value)
|
||||
}
|
||||
if _, ok := b.fields[name]; ok {
|
||||
panic(fmt.Sprintf("duplicate field %q", name))
|
||||
if old, ok := b.fields[name]; ok {
|
||||
panic(fmt.Sprintf("duplicate field %q, added value is %v, old value is %v", name, v, old))
|
||||
}
|
||||
b.fields[name] = v
|
||||
}
|
||||
|
|
|
|||
|
|
@ -815,3 +815,77 @@ func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
|
|||
foo(lhhl, 0, 4, 5, 1)
|
||||
foo(hllh, 4, 0, 1, 5)
|
||||
}
|
||||
|
||||
func TestSelect128FromPair(t *testing.T) {
|
||||
x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
|
||||
y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
|
||||
|
||||
aa := x.Select128FromPair(0, 0, y)
|
||||
ab := x.Select128FromPair(0, 1, y)
|
||||
bc := x.Select128FromPair(1, 2, y)
|
||||
cd := x.Select128FromPair(2, 3, y)
|
||||
da := x.Select128FromPair(3, 0, y)
|
||||
dc := x.Select128FromPair(3, 2, y)
|
||||
|
||||
r := make([]uint64, 4, 4)
|
||||
|
||||
foo := func(v simd.Uint64x4, a, b uint64) {
|
||||
a, b = 2*a, 2*b
|
||||
v.StoreSlice(r)
|
||||
checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
|
||||
}
|
||||
|
||||
foo(aa, 0, 0)
|
||||
foo(ab, 0, 1)
|
||||
foo(bc, 1, 2)
|
||||
foo(cd, 2, 3)
|
||||
foo(da, 3, 0)
|
||||
foo(dc, 3, 2)
|
||||
}
|
||||
|
||||
func TestSelect128FromPairError(t *testing.T) {
|
||||
x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
|
||||
y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
t.Logf("Saw expected panic %v", r)
|
||||
}
|
||||
}()
|
||||
_ = x.Select128FromPair(0, 4, y)
|
||||
|
||||
t.Errorf("Should have panicked")
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 {
|
||||
return x.Select128FromPair(lo, hi, y)
|
||||
}
|
||||
|
||||
func TestSelect128FromPairVar(t *testing.T) {
|
||||
x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
|
||||
y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
|
||||
|
||||
aa := select128FromPair(x, 0, 0, y)
|
||||
ab := select128FromPair(x, 0, 1, y)
|
||||
bc := select128FromPair(x, 1, 2, y)
|
||||
cd := select128FromPair(x, 2, 3, y)
|
||||
da := select128FromPair(x, 3, 0, y)
|
||||
dc := select128FromPair(x, 3, 2, y)
|
||||
|
||||
r := make([]uint64, 4, 4)
|
||||
|
||||
foo := func(v simd.Uint64x4, a, b uint64) {
|
||||
a, b = 2*a, 2*b
|
||||
v.StoreSlice(r)
|
||||
checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
|
||||
}
|
||||
|
||||
foo(aa, 0, 0)
|
||||
foo(ab, 0, 1)
|
||||
foo(bc, 1, 2)
|
||||
foo(cd, 2, 3)
|
||||
foo(da, 3, 0)
|
||||
foo(dc, 3, 2)
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5576,6 +5576,62 @@ func (x Float64x4) Scale(y Float64x4) Float64x4
|
|||
// Asm: VSCALEFPD, CPU Feature: AVX512
|
||||
func (x Float64x8) Scale(y Float64x8) Float64x8
|
||||
|
||||
/* Select128FromPair */
|
||||
|
||||
// Select128FromPair selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
//
|
||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: VPERM2F128, CPU Feature: AVX
|
||||
func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8
|
||||
|
||||
// Select128FromPair selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
//
|
||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: VPERM2F128, CPU Feature: AVX
|
||||
func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4
|
||||
|
||||
// Select128FromPair selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
//
|
||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||
func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8
|
||||
|
||||
// Select128FromPair selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
//
|
||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||
func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4
|
||||
|
||||
// Select128FromPair selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
//
|
||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||
func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8
|
||||
|
||||
// Select128FromPair selects the low and high 128-bit halves from the 128-bit halves
|
||||
// of its two 256-bit inputs, numbering those halves 0, 1, 2, 3.
|
||||
//
|
||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
||||
//
|
||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||
func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4
|
||||
|
||||
/* SetElem */
|
||||
|
||||
// SetElem sets a single constant-indexed element's value.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue