[dev.simd] cmd/compile: add EXTRACT[IF]128 instructions

This is generated by simdgen CL 684080
and should be submitted after it.

Also includes tests.

Change-Id: I1d680911134d8fb92f4deccae4ec373f3ed9f752
Reviewed-on: https://go-review.googlesource.com/c/go/+/684115
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
David Chase 2025-06-25 18:20:50 -04:00
parent 292db9b676
commit 43a61aef56
10 changed files with 425 additions and 0 deletions

View file

@ -655,6 +655,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VREDUCEPD128,
ssa.OpAMD64VREDUCEPD256,
ssa.OpAMD64VREDUCEPD512,
ssa.OpAMD64VEXTRACTF128128,
ssa.OpAMD64VEXTRACTI128128,
ssa.OpAMD64VPROLD128,
ssa.OpAMD64VPROLD256,
ssa.OpAMD64VPROLD512,

View file

@ -224,6 +224,16 @@
(GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
(GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
(GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
(Get128Float32x8 [a] x) => (VEXTRACTF128128 [a] x)
(Get128Float64x4 [a] x) => (VEXTRACTF128128 [a] x)
(Get128Int8x32 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Int16x16 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Int32x8 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Int64x4 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Uint8x32 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Uint16x16 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Uint32x8 [a] x) => (VEXTRACTI128128 [a] x)
(Get128Uint64x4 [a] x) => (VEXTRACTI128128 [a] x)
(GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x)
(GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x)
(GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x)

View file

@ -765,6 +765,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
{name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VEXTRACTF128128", argLength: 1, reg: fp11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRNDSCALEPSMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VREDUCEPSMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VCMPPSMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
@ -878,6 +879,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
{name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VEXTRACTI128128", argLength: 1, reg: fp11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPB256", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VINSERTI128256", argLength: 2, reg: fp21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},

View file

@ -1502,6 +1502,7 @@ func simdGenericOps() []opData {
{name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
@ -1535,6 +1536,7 @@ func simdGenericOps() []opData {
{name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
@ -1562,6 +1564,7 @@ func simdGenericOps() []opData {
{name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"},
@ -1595,6 +1598,7 @@ func simdGenericOps() []opData {
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"},
@ -1614,6 +1618,7 @@ func simdGenericOps() []opData {
{name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"},
@ -1633,7 +1638,9 @@ func simdGenericOps() []opData {
{name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"},
{name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"},
@ -1667,6 +1674,7 @@ func simdGenericOps() []opData {
{name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"},
@ -1686,6 +1694,7 @@ func simdGenericOps() []opData {
{name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"},
@ -1711,6 +1720,7 @@ func simdGenericOps() []opData {
{name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformInversedUint8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedGaloisFieldAffineTransformUint8x32", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedGaloisFieldAffineTransformInversedUint8x32", argLength: 3, commutative: false, aux: "Int8"},
{name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"},

View file

@ -1958,6 +1958,7 @@ const (
OpAMD64VRNDSCALEPS256
OpAMD64VREDUCEPS256
OpAMD64VCMPPS256
OpAMD64VEXTRACTF128128
OpAMD64VRNDSCALEPSMasked256
OpAMD64VREDUCEPSMasked256
OpAMD64VCMPPSMasked256
@ -2071,6 +2072,7 @@ const (
OpAMD64VPCMPB128
OpAMD64VPCMPBMasked128
OpAMD64VPINSRB128
OpAMD64VEXTRACTI128128
OpAMD64VPCMPB256
OpAMD64VPCMPBMasked256
OpAMD64VINSERTI128256
@ -5837,6 +5839,7 @@ const (
OpDiffWithRoundWithPrecisionFloat32x8
OpDiffWithTruncWithPrecisionFloat32x8
OpFloorWithPrecisionFloat32x8
OpGet128Float32x8
OpMaskedCeilWithPrecisionFloat32x8
OpMaskedDiffWithCeilWithPrecisionFloat32x8
OpMaskedDiffWithFloorWithPrecisionFloat32x8
@ -5870,6 +5873,7 @@ const (
OpDiffWithRoundWithPrecisionFloat64x4
OpDiffWithTruncWithPrecisionFloat64x4
OpFloorWithPrecisionFloat64x4
OpGet128Float64x4
OpMaskedCeilWithPrecisionFloat64x4
OpMaskedDiffWithCeilWithPrecisionFloat64x4
OpMaskedDiffWithFloorWithPrecisionFloat64x4
@ -5897,6 +5901,7 @@ const (
OpMaskedTruncWithPrecisionFloat64x8
OpRoundWithPrecisionFloat64x8
OpTruncWithPrecisionFloat64x8
OpGet128Int16x16
OpMaskedShiftAllLeftAndFillUpperFromInt16x16
OpMaskedShiftAllRightAndFillUpperFromInt16x16
OpSet128Int16x16
@ -5930,6 +5935,7 @@ const (
OpSetElemInt32x4
OpShiftAllLeftAndFillUpperFromInt32x4
OpShiftAllRightAndFillUpperFromInt32x4
OpGet128Int32x8
OpMaskedRotateAllLeftInt32x8
OpMaskedRotateAllRightInt32x8
OpMaskedShiftAllLeftAndFillUpperFromInt32x8
@ -5949,6 +5955,7 @@ const (
OpSetElemInt64x2
OpShiftAllLeftAndFillUpperFromInt64x2
OpShiftAllRightAndFillUpperFromInt64x2
OpGet128Int64x4
OpMaskedRotateAllLeftInt64x4
OpMaskedRotateAllRightInt64x4
OpMaskedShiftAllLeftAndFillUpperFromInt64x4
@ -5968,7 +5975,9 @@ const (
OpShiftAllRightAndFillUpperFromInt64x8
OpGetElemInt8x16
OpSetElemInt8x16
OpGet128Int8x32
OpSet128Int8x32
OpGet128Uint16x16
OpMaskedShiftAllLeftAndFillUpperFromUint16x16
OpMaskedShiftAllRightAndFillUpperFromUint16x16
OpSet128Uint16x16
@ -6002,6 +6011,7 @@ const (
OpSetElemUint32x4
OpShiftAllLeftAndFillUpperFromUint32x4
OpShiftAllRightAndFillUpperFromUint32x4
OpGet128Uint32x8
OpMaskedRotateAllLeftUint32x8
OpMaskedRotateAllRightUint32x8
OpMaskedShiftAllLeftAndFillUpperFromUint32x8
@ -6021,6 +6031,7 @@ const (
OpSetElemUint64x2
OpShiftAllLeftAndFillUpperFromUint64x2
OpShiftAllRightAndFillUpperFromUint64x2
OpGet128Uint64x4
OpMaskedRotateAllLeftUint64x4
OpMaskedRotateAllRightUint64x4
OpMaskedShiftAllLeftAndFillUpperFromUint64x4
@ -6046,6 +6057,7 @@ const (
OpSetElemUint8x16
OpGaloisFieldAffineTransformUint8x32
OpGaloisFieldAffineTransformInversedUint8x32
OpGet128Uint8x32
OpMaskedGaloisFieldAffineTransformUint8x32
OpMaskedGaloisFieldAffineTransformInversedUint8x32
OpSet128Uint8x32
@ -30096,6 +30108,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VEXTRACTF128128",
auxType: auxInt8,
argLen: 1,
asm: x86.AVEXTRACTF128,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VRNDSCALEPSMasked256",
auxType: auxInt8,
@ -31820,6 +31846,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VEXTRACTI128128",
auxType: auxInt8,
argLen: 1,
asm: x86.AVEXTRACTI128,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPCMPB256",
auxType: auxInt8,
@ -67706,6 +67746,12 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Get128Float32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedCeilWithPrecisionFloat32x8",
auxType: auxInt8,
@ -67904,6 +67950,12 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Get128Float64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedCeilWithPrecisionFloat64x4",
auxType: auxInt8,
@ -68066,6 +68118,12 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Get128Int16x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedShiftAllLeftAndFillUpperFromInt16x16",
auxType: auxInt8,
@ -68264,6 +68322,12 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "Get128Int32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedRotateAllLeftInt32x8",
auxType: auxInt8,
@ -68378,6 +68442,12 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "Get128Int64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedRotateAllLeftInt64x4",
auxType: auxInt8,
@ -68492,12 +68562,24 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "Get128Int8x32",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "Set128Int8x32",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "Get128Uint16x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedShiftAllLeftAndFillUpperFromUint16x16",
auxType: auxInt8,
@ -68696,6 +68778,12 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "Get128Uint32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedRotateAllLeftUint32x8",
auxType: auxInt8,
@ -68810,6 +68898,12 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "Get128Uint64x4",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedRotateAllLeftUint64x4",
auxType: auxInt8,
@ -68960,6 +69054,12 @@ var opcodeTable = [...]opInfo{
argLen: 2,
generic: true,
},
{
name: "Get128Uint8x32",
auxType: auxInt8,
argLen: 1,
generic: true,
},
{
name: "MaskedGaloisFieldAffineTransformUint8x32",
auxType: auxInt8,

View file

@ -1388,6 +1388,26 @@ func rewriteValueAMD64(v *Value) bool {
case OpGaloisFieldMulUint8x64:
v.Op = OpAMD64VGF2P8MULB512
return true
case OpGet128Float32x8:
return rewriteValueAMD64_OpGet128Float32x8(v)
case OpGet128Float64x4:
return rewriteValueAMD64_OpGet128Float64x4(v)
case OpGet128Int16x16:
return rewriteValueAMD64_OpGet128Int16x16(v)
case OpGet128Int32x8:
return rewriteValueAMD64_OpGet128Int32x8(v)
case OpGet128Int64x4:
return rewriteValueAMD64_OpGet128Int64x4(v)
case OpGet128Int8x32:
return rewriteValueAMD64_OpGet128Int8x32(v)
case OpGet128Uint16x16:
return rewriteValueAMD64_OpGet128Uint16x16(v)
case OpGet128Uint32x8:
return rewriteValueAMD64_OpGet128Uint32x8(v)
case OpGet128Uint64x4:
return rewriteValueAMD64_OpGet128Uint64x4(v)
case OpGet128Uint8x32:
return rewriteValueAMD64_OpGet128Uint8x32(v)
case OpGetCallerPC:
v.Op = OpAMD64LoweredGetCallerPC
return true
@ -30999,6 +31019,136 @@ func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpGet128Float32x8(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Float32x8 [a] x)
// result: (VEXTRACTF128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTF128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Float64x4(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Float64x4 [a] x)
// result: (VEXTRACTF128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTF128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Int16x16(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Int16x16 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Int32x8(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Int32x8 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Int64x4(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Int64x4 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Int8x32(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Int8x32 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Uint16x16(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Uint16x16 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Uint32x8(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Uint32x8 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Uint64x4(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Uint64x4 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGet128Uint8x32(v *Value) bool {
v_0 := v.Args[0]
// match: (Get128Uint8x32 [a] x)
// result: (VEXTRACTI128128 [a] x)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
v.reset(OpAMD64VEXTRACTI128128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg(x)
return true
}
}
func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool {
v_0 := v.Args[0]
// match: (GetElemInt16x8 [a] x)

View file

@ -235,6 +235,16 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int16x16.Get128", opLen1Imm8(ssa.OpGet128Int16x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x8.Get128", opLen1Imm8(ssa.OpGet128Int32x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int64x4.Get128", opLen1Imm8(ssa.OpGet128Int64x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint8x32.Get128", opLen1Imm8(ssa.OpGet128Uint8x32, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint16x16.Get128", opLen1Imm8(ssa.OpGet128Uint16x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint32x8.Get128", opLen1Imm8(ssa.OpGet128Uint32x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint64x4.Get128", opLen1Imm8(ssa.OpGet128Uint64x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)

View file

@ -161,6 +161,22 @@ func checkInt8Slices(t *testing.T, a, b []int8) {
}
}
func checkFloat32Slices(t *testing.T, a, b []float32) {
for i := range b {
if a[i] != b[i] {
t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i])
}
}
}
func checkFloat64Slices(t *testing.T, a, b []float64) {
for i := range b {
if a[i] != b[i] {
t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i])
}
}
}
func TestSlicesInt8(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
@ -209,6 +225,78 @@ func TestSlicesInt8Set128(t *testing.T) {
checkInt8Slices(t, a, b[16:])
}
func TestSlicesInt8Get128(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
u := simd.LoadInt8x32Slice(a) // 1-32
v := u.Get128(0) // 1-16
w := u.Get128(1) // 17-32
b := make([]int8, 32, 32)
v.StoreSlice(b[:16])
w.StoreSlice(b[16:])
checkInt8Slices(t, a, b)
}
func TestSlicesFloat32Set128(t *testing.T) {
a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadFloat32x4Slice(a) // 1-4
u := simd.LoadFloat32x8Slice(a) // 1-4
w := u.Set128(1, v) // 1-4:1-4
b := make([]float32, 8, 8)
w.StoreSlice(b)
checkFloat32Slices(t, a, b[:4])
checkFloat32Slices(t, a, b[4:])
}
func TestSlicesFloat32Get128(t *testing.T) {
a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
u := simd.LoadFloat32x8Slice(a) // 1-8
v := u.Get128(0) // 1-4
w := u.Get128(1) // 5-8
b := make([]float32, 8, 8)
v.StoreSlice(b[:4])
w.StoreSlice(b[4:])
checkFloat32Slices(t, a, b)
}
func TestSlicesFloat64Set128(t *testing.T) {
a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadFloat64x2Slice(a) // 1-2
u := simd.LoadFloat64x4Slice(a) // 1-2
w := u.Set128(1, v) // 1-2:1-2
b := make([]float64, 4, 4)
w.StoreSlice(b)
checkFloat64Slices(t, a, b[:2])
checkFloat64Slices(t, a, b[2:])
}
func TestSlicesFloat64Get128(t *testing.T) {
a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
u := simd.LoadFloat64x4Slice(a) // 1-4
v := u.Get128(0) // 1-2
w := u.Get128(1) // 3-4
b := make([]float64, 4, 4)
v.StoreSlice(b[:2])
w.StoreSlice(b[2:])
checkFloat64Slices(t, a, b)
}
func TestSlicesInt8TooShortLoad(t *testing.T) {
defer func() {
if r := recover(); r != nil {

View file

@ -7954,6 +7954,7 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
// FloorWithPrecision
// GaloisFieldAffineTransform
// GaloisFieldAffineTransformInversed
// Get128
// GetElem
// MaskedCeilWithPrecision
// MaskedDiffWithCeilWithPrecision

View file

@ -1198,6 +1198,58 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
/* Get128 */
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float32x8) Get128(imm uint8) Float32x4
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTF128, CPU Feature: AVX
func (x Float64x4) Get128(imm uint8) Float64x2
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int8x32) Get128(imm uint8) Int8x16
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int16x16) Get128(imm uint8) Int16x8
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int32x8) Get128(imm uint8) Int32x4
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Int64x4) Get128(imm uint8) Int64x2
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint8x32) Get128(imm uint8) Uint8x16
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint16x16) Get128(imm uint8) Uint16x8
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint32x8) Get128(imm uint8) Uint32x4
// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
//
// Asm: VEXTRACTI128, CPU Feature: AVX2
func (x Uint64x4) Get128(imm uint8) Uint64x2
/* GetElem */
// GetElem retrieves a single constant-indexed element's value.