mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] simd, cmd/compile: add more element types for Select128FromPair
Also includes a comment cleanup pass. Fixed NAME processing for additional documentation. Change-Id: Ide5b60c17ddbf3c6eafd20147981c59493fc8133 Reviewed-on: https://go-review.googlesource.com/c/go/+/722180 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
4d26d66a49
commit
74ebdd28d1
11 changed files with 248 additions and 28 deletions
|
|
@ -941,8 +941,12 @@
|
||||||
(ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
|
(ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
|
||||||
(Select128FromPairFloat32x8 ...) => (VPERM2F128256 ...)
|
(Select128FromPairFloat32x8 ...) => (VPERM2F128256 ...)
|
||||||
(Select128FromPairFloat64x4 ...) => (VPERM2F128256 ...)
|
(Select128FromPairFloat64x4 ...) => (VPERM2F128256 ...)
|
||||||
|
(Select128FromPairInt8x32 ...) => (VPERM2I128256 ...)
|
||||||
|
(Select128FromPairInt16x16 ...) => (VPERM2I128256 ...)
|
||||||
(Select128FromPairInt32x8 ...) => (VPERM2I128256 ...)
|
(Select128FromPairInt32x8 ...) => (VPERM2I128256 ...)
|
||||||
(Select128FromPairInt64x4 ...) => (VPERM2I128256 ...)
|
(Select128FromPairInt64x4 ...) => (VPERM2I128256 ...)
|
||||||
|
(Select128FromPairUint8x32 ...) => (VPERM2I128256 ...)
|
||||||
|
(Select128FromPairUint16x16 ...) => (VPERM2I128256 ...)
|
||||||
(Select128FromPairUint32x8 ...) => (VPERM2I128256 ...)
|
(Select128FromPairUint32x8 ...) => (VPERM2I128256 ...)
|
||||||
(Select128FromPairUint64x4 ...) => (VPERM2I128256 ...)
|
(Select128FromPairUint64x4 ...) => (VPERM2I128256 ...)
|
||||||
(SetElemFloat32x4 ...) => (VPINSRD128 ...)
|
(SetElemFloat32x4 ...) => (VPINSRD128 ...)
|
||||||
|
|
|
||||||
|
|
@ -1192,8 +1192,12 @@ func simdGenericOps() []opData {
|
||||||
{name: "SHA1FourRoundsUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "SHA1FourRoundsUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "Select128FromPairFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "Select128FromPairFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "Select128FromPairFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "Select128FromPairFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "Select128FromPairInt8x32", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "Select128FromPairInt16x16", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "Select128FromPairInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "Select128FromPairInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "Select128FromPairInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "Select128FromPairInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "Select128FromPairUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "Select128FromPairUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "Select128FromPairUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "Select128FromPairUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "Select128FromPairUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "Select128FromPairUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
|
|
||||||
|
|
@ -7151,8 +7151,12 @@ const (
|
||||||
OpSHA1FourRoundsUint32x4
|
OpSHA1FourRoundsUint32x4
|
||||||
OpSelect128FromPairFloat32x8
|
OpSelect128FromPairFloat32x8
|
||||||
OpSelect128FromPairFloat64x4
|
OpSelect128FromPairFloat64x4
|
||||||
|
OpSelect128FromPairInt8x32
|
||||||
|
OpSelect128FromPairInt16x16
|
||||||
OpSelect128FromPairInt32x8
|
OpSelect128FromPairInt32x8
|
||||||
OpSelect128FromPairInt64x4
|
OpSelect128FromPairInt64x4
|
||||||
|
OpSelect128FromPairUint8x32
|
||||||
|
OpSelect128FromPairUint16x16
|
||||||
OpSelect128FromPairUint32x8
|
OpSelect128FromPairUint32x8
|
||||||
OpSelect128FromPairUint64x4
|
OpSelect128FromPairUint64x4
|
||||||
OpSetElemFloat32x4
|
OpSetElemFloat32x4
|
||||||
|
|
@ -92250,6 +92254,18 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Select128FromPairInt8x32",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Select128FromPairInt16x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Select128FromPairInt32x8",
|
name: "Select128FromPairInt32x8",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -92262,6 +92278,18 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "Select128FromPairUint8x32",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Select128FromPairUint16x16",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 2,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Select128FromPairUint32x8",
|
name: "Select128FromPairUint32x8",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
|
||||||
|
|
@ -5017,18 +5017,30 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpSelect128FromPairFloat64x4:
|
case OpSelect128FromPairFloat64x4:
|
||||||
v.Op = OpAMD64VPERM2F128256
|
v.Op = OpAMD64VPERM2F128256
|
||||||
return true
|
return true
|
||||||
|
case OpSelect128FromPairInt16x16:
|
||||||
|
v.Op = OpAMD64VPERM2I128256
|
||||||
|
return true
|
||||||
case OpSelect128FromPairInt32x8:
|
case OpSelect128FromPairInt32x8:
|
||||||
v.Op = OpAMD64VPERM2I128256
|
v.Op = OpAMD64VPERM2I128256
|
||||||
return true
|
return true
|
||||||
case OpSelect128FromPairInt64x4:
|
case OpSelect128FromPairInt64x4:
|
||||||
v.Op = OpAMD64VPERM2I128256
|
v.Op = OpAMD64VPERM2I128256
|
||||||
return true
|
return true
|
||||||
|
case OpSelect128FromPairInt8x32:
|
||||||
|
v.Op = OpAMD64VPERM2I128256
|
||||||
|
return true
|
||||||
|
case OpSelect128FromPairUint16x16:
|
||||||
|
v.Op = OpAMD64VPERM2I128256
|
||||||
|
return true
|
||||||
case OpSelect128FromPairUint32x8:
|
case OpSelect128FromPairUint32x8:
|
||||||
v.Op = OpAMD64VPERM2I128256
|
v.Op = OpAMD64VPERM2I128256
|
||||||
return true
|
return true
|
||||||
case OpSelect128FromPairUint64x4:
|
case OpSelect128FromPairUint64x4:
|
||||||
v.Op = OpAMD64VPERM2I128256
|
v.Op = OpAMD64VPERM2I128256
|
||||||
return true
|
return true
|
||||||
|
case OpSelect128FromPairUint8x32:
|
||||||
|
v.Op = OpAMD64VPERM2I128256
|
||||||
|
return true
|
||||||
case OpSelectN:
|
case OpSelectN:
|
||||||
return rewriteValueAMD64_OpSelectN(v)
|
return rewriteValueAMD64_OpSelectN(v)
|
||||||
case OpSetElemFloat32x4:
|
case OpSetElemFloat32x4:
|
||||||
|
|
|
||||||
|
|
@ -953,8 +953,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat32x8, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Float32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Float64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat64x4, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Float64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairFloat64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int8x32.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt8x32, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Int16x16.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt16x16, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt32x8, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Int32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt64x4, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Int64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairInt64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint8x32.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint8x32, types.TypeVec256, 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Uint16x16.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint16x16, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Uint32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint32x8, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Uint32x8.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint32x8, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Uint64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint64x4, types.TypeVec256, 0), sys.AMD64)
|
addF(simdPackage, "Uint64x4.Select128FromPair", opLen2Imm8_II(ssa.OpSelect128FromPairUint64x4, types.TypeVec256, 0), sys.AMD64)
|
||||||
addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
|
addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -351,7 +351,7 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.Op2NameAndType "y"}}, {{.ImmName}} uin
|
||||||
{{if .Documentation}}{{.Documentation}}
|
{{if .Documentation}}{{.Documentation}}
|
||||||
//{{end}}
|
//{{end}}
|
||||||
// {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// {{.ImmName}} result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// {{.ImmName}} should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// {{.ImmName}} should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
|
// Asm: {{.Asm}}, CPU Feature: {{.CPUFeature}}
|
||||||
func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
|
func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"}}) {{.GoType}}
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,8 @@ func (o *Operation) SkipMaskedMethod() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var reForName = regexp.MustCompile(`\bNAME\b`)
|
||||||
|
|
||||||
func (o *Operation) DecodeUnified(v *unify.Value) error {
|
func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||||
if err := v.Decode(&o.rawOperation); err != nil {
|
if err := v.Decode(&o.rawOperation); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
@ -117,7 +119,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||||
} else {
|
} else {
|
||||||
o.Documentation = "// UNDOCUMENTED"
|
o.Documentation = "// UNDOCUMENTED"
|
||||||
}
|
}
|
||||||
o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
|
o.Documentation = reForName.ReplaceAllString(o.Documentation, o.Go)
|
||||||
if isMasked {
|
if isMasked {
|
||||||
o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
|
o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
|
||||||
// Suppress generic op and method declaration for exported methods, if a mask is present.
|
// Suppress generic op and method declaration for exported methods, if a mask is present.
|
||||||
|
|
@ -128,7 +130,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if o.rawOperation.AddDoc != nil {
|
if o.rawOperation.AddDoc != nil {
|
||||||
o.Documentation += "\n" + *o.rawOperation.AddDoc
|
o.Documentation += "\n" + reForName.ReplaceAllString(*o.rawOperation.AddDoc, o.Go)
|
||||||
}
|
}
|
||||||
|
|
||||||
o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
|
o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@
|
||||||
// NAME concatenates selected elements from x and y into the lower and upper
|
// NAME concatenates selected elements from x and y into the lower and upper
|
||||||
// halves of the output. The selection is chosen by the constant parameter h1h0l1l0
|
// halves of the output. The selection is chosen by the constant parameter h1h0l1l0
|
||||||
// where each {h,l}{1,0} is two bits specify which element from y or x to select.
|
// where each {h,l}{1,0} is two bits specify which element from y or x to select.
|
||||||
// For example, {0,1,2,3}.concatSelectedConstant(0b_11_01_00_10, {4,5,6,7}) returns
|
// For example, {0,1,2,3}.NAME(0b_11_01_00_10, {4,5,6,7}) returns
|
||||||
// {2, 0, 5, 7} (don't forget that the binary constant is written big-endian).
|
// {2, 0, 5, 7} (don't forget that the binary constant is written big-endian).
|
||||||
|
|
||||||
- go: concatSelectedConstant
|
- go: concatSelectedConstant
|
||||||
|
|
@ -196,9 +196,12 @@
|
||||||
// The selection is chosen by the constant parameter h1h0l1l0
|
// The selection is chosen by the constant parameter h1h0l1l0
|
||||||
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
||||||
// For example,
|
// For example,
|
||||||
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.NAME(
|
//
|
||||||
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.NAME(
|
||||||
|
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
||||||
|
//
|
||||||
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
||||||
|
//
|
||||||
// (don't forget that the binary constant is written big-endian).
|
// (don't forget that the binary constant is written big-endian).
|
||||||
|
|
||||||
- go: concatSelectedConstantGrouped
|
- go: concatSelectedConstantGrouped
|
||||||
|
|
@ -229,7 +232,6 @@
|
||||||
// NAME treats the 256-bit vectors x and y as a single vector of four
|
// NAME treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.NAME(3,0,{6,7}) returns {7,4}.
|
|
||||||
|
|
||||||
- go: ConcatShiftBytesRight
|
- go: ConcatShiftBytesRight
|
||||||
commutative: false
|
commutative: false
|
||||||
|
|
|
||||||
|
|
@ -837,6 +837,12 @@
|
||||||
- go: Select128FromPair
|
- go: Select128FromPair
|
||||||
asm: VPERM2F128
|
asm: VPERM2F128
|
||||||
operandOrder: II
|
operandOrder: II
|
||||||
|
addDoc: !string |-
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 40, 41}.
|
||||||
in:
|
in:
|
||||||
- &v
|
- &v
|
||||||
go: $t
|
go: $t
|
||||||
|
|
@ -854,6 +860,12 @@
|
||||||
- go: Select128FromPair
|
- go: Select128FromPair
|
||||||
asm: VPERM2F128
|
asm: VPERM2F128
|
||||||
operandOrder: II
|
operandOrder: II
|
||||||
|
addDoc: !string |-
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
|
||||||
in:
|
in:
|
||||||
- &v
|
- &v
|
||||||
go: $t
|
go: $t
|
||||||
|
|
@ -872,6 +884,12 @@
|
||||||
- go: Select128FromPair
|
- go: Select128FromPair
|
||||||
asm: VPERM2I128
|
asm: VPERM2I128
|
||||||
operandOrder: II
|
operandOrder: II
|
||||||
|
addDoc: !string |-
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 50, 51}.NAME(3, 0, {60, 61, 70, 71})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 40, 41}.
|
||||||
in:
|
in:
|
||||||
- &v
|
- &v
|
||||||
go: $t
|
go: $t
|
||||||
|
|
@ -890,6 +908,12 @@
|
||||||
- go: Select128FromPair
|
- go: Select128FromPair
|
||||||
asm: VPERM2I128
|
asm: VPERM2I128
|
||||||
operandOrder: II
|
operandOrder: II
|
||||||
|
addDoc: !string |-
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 50, 51, 52, 53}.NAME(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
|
||||||
in:
|
in:
|
||||||
- &v
|
- &v
|
||||||
go: $t
|
go: $t
|
||||||
|
|
@ -905,6 +929,56 @@
|
||||||
out:
|
out:
|
||||||
- *v
|
- *v
|
||||||
|
|
||||||
|
- go: Select128FromPair
|
||||||
|
asm: VPERM2I128
|
||||||
|
operandOrder: II
|
||||||
|
addDoc: !string |-
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.NAME(3, 0,
|
||||||
|
// {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
|
||||||
|
in:
|
||||||
|
- &v
|
||||||
|
go: $t
|
||||||
|
class: vreg
|
||||||
|
base: int|uint
|
||||||
|
bits: 256
|
||||||
|
OverwriteElementBits: 16
|
||||||
|
- *v
|
||||||
|
- class: immediate
|
||||||
|
immOffset: 0
|
||||||
|
name: "lo, hi"
|
||||||
|
inVariant: []
|
||||||
|
out:
|
||||||
|
- *v
|
||||||
|
|
||||||
|
- go: Select128FromPair
|
||||||
|
asm: VPERM2I128
|
||||||
|
operandOrder: II
|
||||||
|
addDoc: !string |-
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.NAME(3, 0,
|
||||||
|
// {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
|
||||||
|
//
|
||||||
|
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
|
||||||
|
in:
|
||||||
|
- &v
|
||||||
|
go: $t
|
||||||
|
class: vreg
|
||||||
|
base: int|uint
|
||||||
|
bits: 256
|
||||||
|
OverwriteElementBits: 8
|
||||||
|
- *v
|
||||||
|
- class: immediate
|
||||||
|
immOffset: 0
|
||||||
|
name: "lo, hi"
|
||||||
|
inVariant: []
|
||||||
|
out:
|
||||||
|
- *v
|
||||||
|
|
||||||
- go: ConcatShiftBytesRight
|
- go: ConcatShiftBytesRight
|
||||||
asm: VPALIGNR
|
asm: VPALIGNR
|
||||||
in:
|
in:
|
||||||
|
|
@ -930,4 +1004,3 @@
|
||||||
immOffset: 0
|
immOffset: 0
|
||||||
out:
|
out:
|
||||||
- *uint256512
|
- *uint256512
|
||||||
|
|
||||||
|
|
@ -5604,10 +5604,14 @@ func (x Float64x8) Scale(y Float64x8) Float64x8
|
||||||
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.Select128FromPair(3,0,{6,7}) returns {7,4}.
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
|
||||||
//
|
//
|
||||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VPERM2F128, CPU Feature: AVX
|
// Asm: VPERM2F128, CPU Feature: AVX
|
||||||
func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8
|
func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8
|
||||||
|
|
@ -5615,10 +5619,14 @@ func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8
|
||||||
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.Select128FromPair(3,0,{6,7}) returns {7,4}.
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 40, 41}.
|
||||||
//
|
//
|
||||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VPERM2F128, CPU Feature: AVX
|
// Asm: VPERM2F128, CPU Feature: AVX
|
||||||
func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4
|
func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4
|
||||||
|
|
@ -5626,10 +5634,46 @@ func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4
|
||||||
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.Select128FromPair(3,0,{6,7}) returns {7,4}.
|
// For example,
|
||||||
|
//
|
||||||
|
// {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
|
||||||
|
// {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
|
||||||
|
//
|
||||||
|
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
|
||||||
//
|
//
|
||||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
|
//
|
||||||
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
|
func (x Int8x32) Select128FromPair(lo, hi uint8, y Int8x32) Int8x32
|
||||||
|
|
||||||
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
|
// concatenating the two elements specified by lo and hi.
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
|
||||||
|
// {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
|
||||||
|
//
|
||||||
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
|
//
|
||||||
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
|
func (x Int16x16) Select128FromPair(lo, hi uint8, y Int16x16) Int16x16
|
||||||
|
|
||||||
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
|
// concatenating the two elements specified by lo and hi.
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
|
||||||
|
//
|
||||||
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8
|
func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8
|
||||||
|
|
@ -5637,10 +5681,14 @@ func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8
|
||||||
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.Select128FromPair(3,0,{6,7}) returns {7,4}.
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 40, 41}.
|
||||||
//
|
//
|
||||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4
|
func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4
|
||||||
|
|
@ -5648,10 +5696,46 @@ func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4
|
||||||
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.Select128FromPair(3,0,{6,7}) returns {7,4}.
|
// For example,
|
||||||
|
//
|
||||||
|
// {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0,
|
||||||
|
// {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f})
|
||||||
|
//
|
||||||
|
// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}.
|
||||||
//
|
//
|
||||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
|
//
|
||||||
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
|
func (x Uint8x32) Select128FromPair(lo, hi uint8, y Uint8x32) Uint8x32
|
||||||
|
|
||||||
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
|
// concatenating the two elements specified by lo and hi.
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0,
|
||||||
|
// {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}.
|
||||||
|
//
|
||||||
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
|
//
|
||||||
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
|
func (x Uint16x16) Select128FromPair(lo, hi uint8, y Uint16x16) Uint16x16
|
||||||
|
|
||||||
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
|
// concatenating the two elements specified by lo and hi.
|
||||||
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 72, 73, 40, 41, 42, 43}.
|
||||||
|
//
|
||||||
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8
|
func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8
|
||||||
|
|
@ -5659,10 +5743,14 @@ func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8
|
||||||
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
// Select128FromPair treats the 256-bit vectors x and y as a single vector of four
|
||||||
// 128-bit elements, and returns a 256-bit result formed by
|
// 128-bit elements, and returns a 256-bit result formed by
|
||||||
// concatenating the two elements specified by lo and hi.
|
// concatenating the two elements specified by lo and hi.
|
||||||
// For example, {4,5}.Select128FromPair(3,0,{6,7}) returns {7,4}.
|
// For example,
|
||||||
|
//
|
||||||
|
// {40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71})
|
||||||
|
//
|
||||||
|
// returns {70, 71, 40, 41}.
|
||||||
//
|
//
|
||||||
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table.
|
||||||
// lo, hi should be between 0 and 3, inclusive; other values will result in a runtime panic.
|
// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic.
|
||||||
//
|
//
|
||||||
// Asm: VPERM2I128, CPU Feature: AVX2
|
// Asm: VPERM2I128, CPU Feature: AVX2
|
||||||
func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4
|
func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4
|
||||||
|
|
|
||||||
|
|
@ -144,11 +144,12 @@ func (x Float32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Float32x8) Fl
|
||||||
// The selection is chosen by the constant parameter h1h0l1l0
|
// The selection is chosen by the constant parameter h1h0l1l0
|
||||||
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
||||||
// For example,
|
// For example,
|
||||||
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
|
|
||||||
//
|
//
|
||||||
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
|
||||||
|
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
||||||
//
|
//
|
||||||
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
||||||
|
//
|
||||||
// (don't forget that the binary constant is written big-endian).
|
// (don't forget that the binary constant is written big-endian).
|
||||||
//
|
//
|
||||||
// h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
// h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
|
@ -215,11 +216,12 @@ func (x Int32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Int32x8) Int32x
|
||||||
// The selection is chosen by the constant parameter h1h0l1l0
|
// The selection is chosen by the constant parameter h1h0l1l0
|
||||||
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
||||||
// For example,
|
// For example,
|
||||||
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
|
|
||||||
//
|
//
|
||||||
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
|
||||||
|
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
||||||
//
|
//
|
||||||
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
||||||
|
//
|
||||||
// (don't forget that the binary constant is written big-endian).
|
// (don't forget that the binary constant is written big-endian).
|
||||||
//
|
//
|
||||||
// h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
// h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
|
@ -286,11 +288,12 @@ func (x Uint32x8) concatSelectedConstantGrouped(h1h0l1l0 uint8, y Uint32x8) Uint
|
||||||
// The selection is chosen by the constant parameter h1h0l1l0
|
// The selection is chosen by the constant parameter h1h0l1l0
|
||||||
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
// where each {h,l}{1,0} is two bits specifying which element from y or x to select.
|
||||||
// For example,
|
// For example,
|
||||||
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
|
|
||||||
//
|
//
|
||||||
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
// {0,1,2,3,8,9,10,11, 20,21,22,23,28,29,210,211}.concatSelectedConstantGrouped(
|
||||||
|
// 0b_11_01_00_10, {4,5,6,7,12,13,14,15, 24,25,26,27,212,213,214,215})
|
||||||
//
|
//
|
||||||
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
// returns {2,0,5,7,10,8,13,15, 22,20,25,27,210,28,213,215}
|
||||||
|
//
|
||||||
// (don't forget that the binary constant is written big-endian).
|
// (don't forget that the binary constant is written big-endian).
|
||||||
//
|
//
|
||||||
// h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
// h1h0l1l0 results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue