mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd: added methods for "float" GetElem
This also required a "always use operation with least OverrideBase" filter in choosing the machine instructions. The order of generated HW operations is slightly modified because the Float version of GetElem appears earlier in the sorted operations list, though it is not chosen to generate the HW Op. Change-Id: I95fa67afca9c8b6f4f18941fdcaf69afdad8055b Reviewed-on: https://go-review.googlesource.com/c/go/+/696375 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
7380213a4e
commit
9a934d5080
11 changed files with 122 additions and 43 deletions
|
|
@ -1128,10 +1128,10 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VPINSRW128:
|
ssa.OpAMD64VPINSRW128:
|
||||||
p = simdVgpvImm8(s, v)
|
p = simdVgpvImm8(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VPEXTRB128,
|
case ssa.OpAMD64VPEXTRD128,
|
||||||
ssa.OpAMD64VPEXTRW128,
|
ssa.OpAMD64VPEXTRQ128,
|
||||||
ssa.OpAMD64VPEXTRD128,
|
ssa.OpAMD64VPEXTRB128,
|
||||||
ssa.OpAMD64VPEXTRQ128:
|
ssa.OpAMD64VPEXTRW128:
|
||||||
p = simdVgpImm8(s, v)
|
p = simdVgpImm8(s, v)
|
||||||
|
|
||||||
case ssa.OpAMD64VGF2P8AFFINEINVQBMasked128,
|
case ssa.OpAMD64VGF2P8AFFINEINVQBMasked128,
|
||||||
|
|
|
||||||
|
|
@ -524,6 +524,8 @@
|
||||||
(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||||
(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||||
(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
|
||||||
|
(GetElemFloat32x4 ...) => (VPEXTRD128 ...)
|
||||||
|
(GetElemFloat64x2 ...) => (VPEXTRQ128 ...)
|
||||||
(GetElemInt8x16 ...) => (VPEXTRB128 ...)
|
(GetElemInt8x16 ...) => (VPEXTRB128 ...)
|
||||||
(GetElemInt16x8 ...) => (VPEXTRW128 ...)
|
(GetElemInt16x8 ...) => (VPEXTRW128 ...)
|
||||||
(GetElemInt32x4 ...) => (VPEXTRD128 ...)
|
(GetElemInt32x4 ...) => (VPEXTRD128 ...)
|
||||||
|
|
|
||||||
|
|
@ -978,10 +978,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
{name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
|
||||||
{name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "UInt8", commutative: false, typ: "int8", resultInArg0: false},
|
|
||||||
{name: "VPEXTRW128", argLength: 1, reg: wgp, asm: "VPEXTRW", aux: "UInt8", commutative: false, typ: "int16", resultInArg0: false},
|
|
||||||
{name: "VPEXTRD128", argLength: 1, reg: vgp, asm: "VPEXTRD", aux: "UInt8", commutative: false, typ: "int32", resultInArg0: false},
|
{name: "VPEXTRD128", argLength: 1, reg: vgp, asm: "VPEXTRD", aux: "UInt8", commutative: false, typ: "int32", resultInArg0: false},
|
||||||
{name: "VPEXTRQ128", argLength: 1, reg: vgp, asm: "VPEXTRQ", aux: "UInt8", commutative: false, typ: "int64", resultInArg0: false},
|
{name: "VPEXTRQ128", argLength: 1, reg: vgp, asm: "VPEXTRQ", aux: "UInt8", commutative: false, typ: "int64", resultInArg0: false},
|
||||||
|
{name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "UInt8", commutative: false, typ: "int8", resultInArg0: false},
|
||||||
|
{name: "VPEXTRW128", argLength: 1, reg: wgp, asm: "VPEXTRW", aux: "UInt8", commutative: false, typ: "int16", resultInArg0: false},
|
||||||
{name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
|
||||||
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
|
||||||
|
|
|
||||||
|
|
@ -1720,6 +1720,8 @@ func simdGenericOps() []opData {
|
||||||
{name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
{name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
|
{name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "GetElemFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
{name: "GetElemFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
{name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
{name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
|
||||||
|
|
|
||||||
|
|
@ -2201,10 +2201,10 @@ const (
|
||||||
OpAMD64VGF2P8AFFINEQBMasked128
|
OpAMD64VGF2P8AFFINEQBMasked128
|
||||||
OpAMD64VGF2P8AFFINEQBMasked256
|
OpAMD64VGF2P8AFFINEQBMasked256
|
||||||
OpAMD64VGF2P8AFFINEQBMasked512
|
OpAMD64VGF2P8AFFINEQBMasked512
|
||||||
OpAMD64VPEXTRB128
|
|
||||||
OpAMD64VPEXTRW128
|
|
||||||
OpAMD64VPEXTRD128
|
OpAMD64VPEXTRD128
|
||||||
OpAMD64VPEXTRQ128
|
OpAMD64VPEXTRQ128
|
||||||
|
OpAMD64VPEXTRB128
|
||||||
|
OpAMD64VPEXTRW128
|
||||||
OpAMD64VEXTRACTF128128
|
OpAMD64VEXTRACTF128128
|
||||||
OpAMD64VEXTRACTF64X4256
|
OpAMD64VEXTRACTF64X4256
|
||||||
OpAMD64VEXTRACTI128128
|
OpAMD64VEXTRACTI128128
|
||||||
|
|
@ -6352,6 +6352,8 @@ const (
|
||||||
OpGaloisFieldAffineTransformUint8x16
|
OpGaloisFieldAffineTransformUint8x16
|
||||||
OpGaloisFieldAffineTransformUint8x32
|
OpGaloisFieldAffineTransformUint8x32
|
||||||
OpGaloisFieldAffineTransformUint8x64
|
OpGaloisFieldAffineTransformUint8x64
|
||||||
|
OpGetElemFloat32x4
|
||||||
|
OpGetElemFloat64x2
|
||||||
OpGetElemInt8x16
|
OpGetElemInt8x16
|
||||||
OpGetElemInt16x8
|
OpGetElemInt16x8
|
||||||
OpGetElemInt32x4
|
OpGetElemInt32x4
|
||||||
|
|
@ -34153,34 +34155,6 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "VPEXTRB128",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
asm: x86.AVPEXTRB,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "VPEXTRW128",
|
|
||||||
auxType: auxUInt8,
|
|
||||||
argLen: 1,
|
|
||||||
asm: x86.AVPEXTRW,
|
|
||||||
reg: regInfo{
|
|
||||||
inputs: []inputInfo{
|
|
||||||
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
|
||||||
},
|
|
||||||
outputs: []outputInfo{
|
|
||||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "VPEXTRD128",
|
name: "VPEXTRD128",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -34209,6 +34183,34 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPEXTRB128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPEXTRB,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPEXTRW128",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
asm: x86.AVPEXTRW,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VEXTRACTF128128",
|
name: "VEXTRACTF128128",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
@ -72920,6 +72922,18 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "GetElemFloat32x4",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "GetElemFloat64x2",
|
||||||
|
auxType: auxUInt8,
|
||||||
|
argLen: 1,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "GetElemInt8x16",
|
name: "GetElemInt8x16",
|
||||||
auxType: auxUInt8,
|
auxType: auxUInt8,
|
||||||
|
|
|
||||||
|
|
@ -2186,6 +2186,12 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpGetClosurePtr:
|
case OpGetClosurePtr:
|
||||||
v.Op = OpAMD64LoweredGetClosurePtr
|
v.Op = OpAMD64LoweredGetClosurePtr
|
||||||
return true
|
return true
|
||||||
|
case OpGetElemFloat32x4:
|
||||||
|
v.Op = OpAMD64VPEXTRD128
|
||||||
|
return true
|
||||||
|
case OpGetElemFloat64x2:
|
||||||
|
v.Op = OpAMD64VPEXTRQ128
|
||||||
|
return true
|
||||||
case OpGetElemInt16x8:
|
case OpGetElemInt16x8:
|
||||||
v.Op = OpAMD64VPEXTRW128
|
v.Op = OpAMD64VPEXTRW128
|
||||||
return true
|
return true
|
||||||
|
|
|
||||||
|
|
@ -536,6 +536,8 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
|
||||||
addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
|
||||||
addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float32x4.GetElem", opLen1Imm8(ssa.OpGetElemFloat32x4, types.Types[types.TFLOAT32], 0), sys.AMD64)
|
||||||
|
addF(simdPackage, "Float64x2.GetElem", opLen1Imm8(ssa.OpGetElemFloat64x2, types.Types[types.TFLOAT64], 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
|
addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
|
addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
|
||||||
addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
|
addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
|
||||||
|
|
|
||||||
|
|
@ -46,22 +46,47 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
||||||
OpsData []opData
|
OpsData []opData
|
||||||
OpsDataImm []opData
|
OpsDataImm []opData
|
||||||
}
|
}
|
||||||
seen := map[string]struct{}{}
|
|
||||||
regInfoSet := map[string]bool{
|
regInfoSet := map[string]bool{
|
||||||
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
|
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
|
||||||
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true}
|
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true}
|
||||||
opsData := make([]opData, 0)
|
opsData := make([]opData, 0)
|
||||||
opsDataImm := make([]opData, 0)
|
opsDataImm := make([]opData, 0)
|
||||||
|
|
||||||
|
// Determine the "best" version of an instruction to use
|
||||||
|
best := make(map[string]Operation)
|
||||||
|
var mOpOrder []string
|
||||||
|
countOverrides := func(s []Operand) int {
|
||||||
|
a := 0
|
||||||
|
for _, o := range s {
|
||||||
|
if o.OverwriteBase != nil {
|
||||||
|
a++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
}
|
||||||
for _, op := range ops {
|
for _, op := range ops {
|
||||||
shapeIn, shapeOut, maskType, _, gOp := op.shape()
|
_, _, maskType, _, gOp := op.shape()
|
||||||
asm := machineOpName(maskType, gOp)
|
asm := machineOpName(maskType, gOp)
|
||||||
|
other, ok := best[asm]
|
||||||
|
if !ok {
|
||||||
|
best[asm] = op
|
||||||
|
mOpOrder = append(mOpOrder, asm)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// see if "op" is better than "other"
|
||||||
|
if countOverrides(op.In)+countOverrides(op.Out) < countOverrides(other.In)+countOverrides(other.Out) {
|
||||||
|
best[asm] = op
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, asm := range mOpOrder {
|
||||||
|
op := best[asm]
|
||||||
|
shapeIn, shapeOut, _, _, gOp := op.shape()
|
||||||
|
|
||||||
// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
|
// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
|
||||||
// one here with a name suffix "Merging". The rewrite rules will need them.
|
// one here with a name suffix "Merging". The rewrite rules will need them.
|
||||||
if _, ok := seen[asm]; ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[asm] = struct{}{}
|
|
||||||
regInfo, err := op.regShape()
|
regInfo, err := op.regShape()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ type rawOperation struct {
|
||||||
NoTypes *string
|
NoTypes *string
|
||||||
// If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped.
|
// If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped.
|
||||||
NoGenericOps *string
|
NoGenericOps *string
|
||||||
// If non-nil, this string will be attached to the machine ssa op name.
|
// If non-nil, this string will be attached to the machine ssa op name. E.g. "const"
|
||||||
SSAVariant *string
|
SSAVariant *string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,20 @@
|
||||||
base: $b
|
base: $b
|
||||||
bits: $e
|
bits: $e
|
||||||
|
|
||||||
|
- go: GetElem
|
||||||
|
asm: "VPEXTR[DQ]"
|
||||||
|
in:
|
||||||
|
- class: vreg
|
||||||
|
base: int
|
||||||
|
elemBits: $e
|
||||||
|
OverwriteBase: float
|
||||||
|
- *imm
|
||||||
|
out:
|
||||||
|
- class: greg
|
||||||
|
base: int
|
||||||
|
bits: $e
|
||||||
|
OverwriteBase: float
|
||||||
|
|
||||||
- go: "SetHi|SetLo"
|
- go: "SetHi|SetLo"
|
||||||
asm: "VINSERTI128|VINSERTI64X4"
|
asm: "VINSERTI128|VINSERTI64X4"
|
||||||
inVariant: []
|
inVariant: []
|
||||||
|
|
|
||||||
|
|
@ -3470,6 +3470,20 @@ func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
|
||||||
|
|
||||||
/* GetElem */
|
/* GetElem */
|
||||||
|
|
||||||
|
// GetElem retrieves a single constant-indexed element's value.
|
||||||
|
//
|
||||||
|
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPEXTRD, CPU Feature: AVX
|
||||||
|
func (x Float32x4) GetElem(index uint8) float32
|
||||||
|
|
||||||
|
// GetElem retrieves a single constant-indexed element's value.
|
||||||
|
//
|
||||||
|
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
//
|
||||||
|
// Asm: VPEXTRQ, CPU Feature: AVX
|
||||||
|
func (x Float64x2) GetElem(index uint8) float64
|
||||||
|
|
||||||
// GetElem retrieves a single constant-indexed element's value.
|
// GetElem retrieves a single constant-indexed element's value.
|
||||||
//
|
//
|
||||||
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue