[dev.simd] cmd/compile: add simd VPEXTRA*

This CL is generated by simdgen CL 683836
and this CL should be submitted after its
generator.

Change-Id: I1aa893b185826ad1f9fb60b85c75eda31f70623b
Reviewed-on: https://go-review.googlesource.com/c/go/+/683797
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
David Chase 2025-06-24 18:29:38 -04:00
parent 0d8cb89f5c
commit 7fadfa9638
9 changed files with 322 additions and 0 deletions

View file

@ -724,6 +724,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPINSRQ128: ssa.OpAMD64VPINSRQ128:
p = simdFpgpfpImm8(s, v) p = simdFpgpfpImm8(s, v)
case ssa.OpAMD64VPEXTRB128,
ssa.OpAMD64VPEXTRW128,
ssa.OpAMD64VPEXTRD128,
ssa.OpAMD64VPEXTRQ128:
p = simdFpgpImm8(s, v)
default: default:
// Unknown reg shape // Unknown reg shape
return false return false

View file

@ -251,6 +251,14 @@
(FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...) (FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
(FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...) (FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
(FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...) (FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
(GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x)
(GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x)
(GetElemInt64x2 [a] x) => (VPEXTRQ128 [a] x)
(GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x)
(GetElemUint16x8 [a] x) => (VPEXTRW128 [a] x)
(GetElemUint32x4 [a] x) => (VPEXTRD128 [a] x)
(GetElemUint64x2 [a] x) => (VPEXTRQ128 [a] x)
(GetElemUint8x16 [a] x) => (VPEXTRB128 [a] x)
(GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y)) (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y))
(GreaterFloat32x4 x y) => (VCMPPS128 [6] x y) (GreaterFloat32x4 x y) => (VCMPPS128 [6] x y)
(GreaterFloat32x8 x y) => (VCMPPS256 [6] x y) (GreaterFloat32x8 x y) => (VCMPPS256 [6] x y)

View file

@ -643,16 +643,19 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
{name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPEXTRW128", argLength: 1, reg: fpgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false},
{name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPEXTRD128", argLength: 1, reg: fpgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false},
{name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPEXTRQ128", argLength: 1, reg: fpgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false},
{name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
@ -660,6 +663,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
{name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPEXTRB128", argLength: 1, reg: fpgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false},
{name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},

View file

@ -1372,13 +1372,21 @@ func simdGenericOps() []opData {
{name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemUint8x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
} }
} }

View file

@ -1836,16 +1836,19 @@ const (
OpAMD64VPCMPWMasked256 OpAMD64VPCMPWMasked256
OpAMD64VPCMPW512 OpAMD64VPCMPW512
OpAMD64VPCMPWMasked512 OpAMD64VPCMPWMasked512
OpAMD64VPEXTRW128
OpAMD64VPCMPW128 OpAMD64VPCMPW128
OpAMD64VPCMPWMasked128 OpAMD64VPCMPWMasked128
OpAMD64VPINSRW128 OpAMD64VPINSRW128
OpAMD64VPCMPD512 OpAMD64VPCMPD512
OpAMD64VPCMPDMasked512 OpAMD64VPCMPDMasked512
OpAMD64VPEXTRD128
OpAMD64VPCMPD128 OpAMD64VPCMPD128
OpAMD64VPCMPDMasked128 OpAMD64VPCMPDMasked128
OpAMD64VPINSRD128 OpAMD64VPINSRD128
OpAMD64VPCMPD256 OpAMD64VPCMPD256
OpAMD64VPCMPDMasked256 OpAMD64VPCMPDMasked256
OpAMD64VPEXTRQ128
OpAMD64VPCMPQ128 OpAMD64VPCMPQ128
OpAMD64VPCMPQMasked128 OpAMD64VPCMPQMasked128
OpAMD64VPINSRQ128 OpAMD64VPINSRQ128
@ -1853,6 +1856,7 @@ const (
OpAMD64VPCMPQMasked256 OpAMD64VPCMPQMasked256
OpAMD64VPCMPQ512 OpAMD64VPCMPQ512
OpAMD64VPCMPQMasked512 OpAMD64VPCMPQMasked512
OpAMD64VPEXTRB128
OpAMD64VPCMPB128 OpAMD64VPCMPB128
OpAMD64VPCMPBMasked128 OpAMD64VPCMPBMasked128
OpAMD64VPINSRB128 OpAMD64VPINSRB128
@ -5479,13 +5483,21 @@ const (
OpRoundWithPrecisionFloat64x8 OpRoundWithPrecisionFloat64x8
OpTruncSuppressExceptionWithPrecisionFloat64x8 OpTruncSuppressExceptionWithPrecisionFloat64x8
OpTruncWithPrecisionFloat64x8 OpTruncWithPrecisionFloat64x8
OpGetElemInt16x8
OpSetElemInt16x8 OpSetElemInt16x8
OpGetElemInt32x4
OpSetElemInt32x4 OpSetElemInt32x4
OpGetElemInt64x2
OpSetElemInt64x2 OpSetElemInt64x2
OpGetElemInt8x16
OpSetElemInt8x16 OpSetElemInt8x16
OpGetElemUint16x8
OpSetElemUint16x8 OpSetElemUint16x8
OpGetElemUint32x4
OpSetElemUint32x4 OpSetElemUint32x4
OpGetElemUint64x2
OpSetElemUint64x2 OpSetElemUint64x2
OpGetElemUint8x16
OpSetElemUint8x16 OpSetElemUint8x16
) )
@ -27718,6 +27730,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPEXTRW128",
auxType: auxInt8,
argLen: 1,
asm: x86.AVPEXTRW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "VPCMPW128", name: "VPCMPW128",
auxType: auxInt8, auxType: auxInt8,
@ -27798,6 +27824,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPEXTRD128",
auxType: auxInt8,
argLen: 1,
asm: x86.AVPEXTRD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "VPCMPD128", name: "VPCMPD128",
auxType: auxInt8, auxType: auxInt8,
@ -27877,6 +27917,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPEXTRQ128",
auxType: auxInt8,
argLen: 1,
asm: x86.AVPEXTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "VPCMPQ128", name: "VPCMPQ128",
auxType: auxInt8, auxType: auxInt8,
@ -27989,6 +28043,20 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPEXTRB128",
auxType: auxInt8,
argLen: 1,
asm: x86.AVPEXTRB,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{ {
name: "VPCMPB128", name: "VPCMPB128",
auxType: auxInt8, auxType: auxInt8,
@ -63225,48 +63293,96 @@ var opcodeTable = [...]opInfo{
argLen: 1, argLen: 1,
generic: true, generic: true,
}, },
{
name: "GetElemInt16x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemInt16x8", name: "SetElemInt16x8",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemInt32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemInt32x4", name: "SetElemInt32x4",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemInt64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemInt64x2", name: "SetElemInt64x2",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemInt8x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemInt8x16", name: "SetElemInt8x16",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemUint16x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemUint16x8", name: "SetElemUint16x8",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemUint32x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemUint32x4", name: "SetElemUint32x4",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemUint64x2",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemUint64x2", name: "SetElemUint64x2",
auxType: auxInt8, auxType: auxInt8,
argLen: 2, argLen: 2,
generic: true, generic: true,
}, },
{
name: "GetElemUint8x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{ {
name: "SetElemUint8x16", name: "SetElemUint8x16",
auxType: auxInt8, auxType: auxInt8,

View file

@ -1448,6 +1448,22 @@ func rewriteValueAMD64(v *Value) bool {
case OpGetClosurePtr: case OpGetClosurePtr:
v.Op = OpAMD64LoweredGetClosurePtr v.Op = OpAMD64LoweredGetClosurePtr
return true return true
case OpGetElemInt16x8:
return rewriteValueAMD64_OpGetElemInt16x8(v)
case OpGetElemInt32x4:
return rewriteValueAMD64_OpGetElemInt32x4(v)
case OpGetElemInt64x2:
return rewriteValueAMD64_OpGetElemInt64x2(v)
case OpGetElemInt8x16:
return rewriteValueAMD64_OpGetElemInt8x16(v)
case OpGetElemUint16x8:
return rewriteValueAMD64_OpGetElemUint16x8(v)
case OpGetElemUint32x4:
return rewriteValueAMD64_OpGetElemUint32x4(v)
case OpGetElemUint64x2:
return rewriteValueAMD64_OpGetElemUint64x2(v)
case OpGetElemUint8x16:
return rewriteValueAMD64_OpGetElemUint8x16(v)
case OpGetG: case OpGetG:
return rewriteValueAMD64_OpGetG(v) return rewriteValueAMD64_OpGetG(v)
case OpGreaterEqualFloat32x16: case OpGreaterEqualFloat32x16:
@ -30549,6 +30565,110 @@ func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool {
return true return true
} }
} }
func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemInt16x8 [a] x)
// result: (VPEXTRW128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRW128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemInt32x4(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemInt32x4 [a] x)
// result: (VPEXTRD128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRD128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemInt64x2(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemInt64x2 [a] x)
// result: (VPEXTRQ128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRQ128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemInt8x16(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemInt8x16 [a] x)
// result: (VPEXTRB128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRB128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemUint16x8(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemUint16x8 [a] x)
// result: (VPEXTRW128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRW128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemUint32x4(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemUint32x4 [a] x)
// result: (VPEXTRD128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRD128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemUint64x2(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemUint64x2 [a] x)
// result: (VPEXTRQ128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRQ128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemUint8x16(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemUint8x16 [a] x)
// result: (VPEXTRB128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VPEXTRB128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetG(v *Value) bool { func rewriteValueAMD64_OpGetG(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (GetG mem) // match: (GetG mem)

View file

@ -262,6 +262,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
addF(simdPackage, "Int64x2.GetElem", opLen1Imm8(ssa.OpGetElemInt64x2, types.Types[types.TINT64], 0), sys.AMD64)
addF(simdPackage, "Uint8x16.GetElem", opLen1Imm8(ssa.OpGetElemUint8x16, types.Types[types.TUINT8], 0), sys.AMD64)
addF(simdPackage, "Uint16x8.GetElem", opLen1Imm8(ssa.OpGetElemUint16x8, types.Types[types.TUINT16], 0), sys.AMD64)
addF(simdPackage, "Uint32x4.GetElem", opLen1Imm8(ssa.OpGetElemUint32x4, types.Types[types.TUINT32], 0), sys.AMD64)
addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64)
addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)

View file

@ -183,6 +183,16 @@ func TestSlicesInt8SetElem(t *testing.T) {
checkInt8Slices(t, a, b) checkInt8Slices(t, a, b)
} }
func TestSlicesInt8GetElem(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x16Slice(a)
e := v.GetElem(2)
if e != a[2] {
t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
}
}
func TestSlicesInt8TooShortLoad(t *testing.T) { func TestSlicesInt8TooShortLoad(t *testing.T) {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {

View file

@ -1426,6 +1426,48 @@ func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX // Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8 func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
/* GetElem */
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRB, CPU Feature: AVX512EVEX
func (x Int8x16) GetElem(imm8 uint8) int8
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRW, CPU Feature: AVX512EVEX
func (x Int16x8) GetElem(imm8 uint8) int16
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Int32x4) GetElem(imm8 uint8) int32
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Int64x2) GetElem(imm8 uint8) int64
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRB, CPU Feature: AVX512EVEX
func (x Uint8x16) GetElem(imm8 uint8) uint8
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRW, CPU Feature: AVX512EVEX
func (x Uint16x8) GetElem(imm8 uint8) uint16
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRD, CPU Feature: AVX
func (x Uint32x4) GetElem(imm8 uint8) uint32
// GetElem retrieves a single constant-indexed element's value.
//
// Asm: VPEXTRQ, CPU Feature: AVX
func (x Uint64x2) GetElem(imm8 uint8) uint64
/* Greater */ /* Greater */
// Greater compares for greater than. // Greater compares for greater than.