diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index ac2848d1baf..fbb63ccaa14 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -655,6 +655,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPD128, ssa.OpAMD64VREDUCEPD256, ssa.OpAMD64VREDUCEPD512, + ssa.OpAMD64VEXTRACTF128128, + ssa.OpAMD64VEXTRACTI128128, ssa.OpAMD64VPROLD128, ssa.OpAMD64VPROLD256, ssa.OpAMD64VPROLD512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 6b1078e7412..6ba52a9e9c9 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -224,6 +224,16 @@ (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...) (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...) (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...) +(Get128Float32x8 [a] x) => (VEXTRACTF128128 [a] x) +(Get128Float64x4 [a] x) => (VEXTRACTF128128 [a] x) +(Get128Int8x32 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Int16x16 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Int32x8 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Int64x4 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Uint8x32 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Uint16x16 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Uint32x8 [a] x) => (VEXTRACTI128128 [a] x) +(Get128Uint64x4 [a] x) => (VEXTRACTI128128 [a] x) (GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x) (GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x) (GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 787d3c5fcbf..8c895d9f455 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -765,6 +765,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VEXTRACTF128128", argLength: 1, reg: fp11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPSMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VREDUCEPSMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VCMPPSMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, @@ -878,6 +879,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VEXTRACTI128128", argLength: 1, reg: fp11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPB256", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VINSERTI128256", argLength: 2, reg: fp21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 076a16ebda6..c74893b97a2 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -1502,6 +1502,7 @@ func simdGenericOps() []opData { {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, @@ -1535,6 +1536,7 @@ func simdGenericOps() []opData { {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, @@ -1562,6 +1564,7 @@ func simdGenericOps() []opData { {name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"}, @@ -1595,6 +1598,7 @@ func simdGenericOps() []opData { {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"}, @@ -1614,6 +1618,7 @@ func simdGenericOps() []opData { {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"}, @@ -1633,7 +1638,9 @@ func simdGenericOps() []opData { {name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"}, {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"}, @@ -1667,6 +1674,7 @@ func simdGenericOps() []opData { {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"}, @@ -1686,6 +1694,7 @@ func simdGenericOps() []opData { {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"}, @@ -1711,6 +1720,7 @@ func simdGenericOps() []opData { {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformInversedUint8x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"}, {name: "MaskedGaloisFieldAffineTransformUint8x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "MaskedGaloisFieldAffineTransformInversedUint8x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ece791ca6ce..91380e5e089 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1958,6 +1958,7 @@ const ( OpAMD64VRNDSCALEPS256 OpAMD64VREDUCEPS256 OpAMD64VCMPPS256 + OpAMD64VEXTRACTF128128 OpAMD64VRNDSCALEPSMasked256 OpAMD64VREDUCEPSMasked256 OpAMD64VCMPPSMasked256 @@ -2071,6 +2072,7 @@ const ( OpAMD64VPCMPB128 OpAMD64VPCMPBMasked128 OpAMD64VPINSRB128 + OpAMD64VEXTRACTI128128 OpAMD64VPCMPB256 OpAMD64VPCMPBMasked256 OpAMD64VINSERTI128256 @@ -5837,6 +5839,7 @@ const ( OpDiffWithRoundWithPrecisionFloat32x8 OpDiffWithTruncWithPrecisionFloat32x8 OpFloorWithPrecisionFloat32x8 + OpGet128Float32x8 OpMaskedCeilWithPrecisionFloat32x8 OpMaskedDiffWithCeilWithPrecisionFloat32x8 OpMaskedDiffWithFloorWithPrecisionFloat32x8 @@ -5870,6 +5873,7 @@ const ( OpDiffWithRoundWithPrecisionFloat64x4 OpDiffWithTruncWithPrecisionFloat64x4 OpFloorWithPrecisionFloat64x4 + OpGet128Float64x4 OpMaskedCeilWithPrecisionFloat64x4 OpMaskedDiffWithCeilWithPrecisionFloat64x4 OpMaskedDiffWithFloorWithPrecisionFloat64x4 @@ -5897,6 +5901,7 @@ const ( OpMaskedTruncWithPrecisionFloat64x8 OpRoundWithPrecisionFloat64x8 OpTruncWithPrecisionFloat64x8 + OpGet128Int16x16 OpMaskedShiftAllLeftAndFillUpperFromInt16x16 OpMaskedShiftAllRightAndFillUpperFromInt16x16 OpSet128Int16x16 @@ -5930,6 +5935,7 @@ const ( OpSetElemInt32x4 OpShiftAllLeftAndFillUpperFromInt32x4 OpShiftAllRightAndFillUpperFromInt32x4 + OpGet128Int32x8 OpMaskedRotateAllLeftInt32x8 OpMaskedRotateAllRightInt32x8 OpMaskedShiftAllLeftAndFillUpperFromInt32x8 @@ -5949,6 +5955,7 @@ const ( OpSetElemInt64x2 OpShiftAllLeftAndFillUpperFromInt64x2 OpShiftAllRightAndFillUpperFromInt64x2 + OpGet128Int64x4 OpMaskedRotateAllLeftInt64x4 OpMaskedRotateAllRightInt64x4 OpMaskedShiftAllLeftAndFillUpperFromInt64x4 @@ -5968,7 +5975,9 @@ const ( OpShiftAllRightAndFillUpperFromInt64x8 OpGetElemInt8x16 OpSetElemInt8x16 + OpGet128Int8x32 OpSet128Int8x32 + OpGet128Uint16x16 OpMaskedShiftAllLeftAndFillUpperFromUint16x16 OpMaskedShiftAllRightAndFillUpperFromUint16x16 OpSet128Uint16x16 @@ -6002,6 +6011,7 @@ const ( OpSetElemUint32x4 OpShiftAllLeftAndFillUpperFromUint32x4 OpShiftAllRightAndFillUpperFromUint32x4 + OpGet128Uint32x8 OpMaskedRotateAllLeftUint32x8 OpMaskedRotateAllRightUint32x8 OpMaskedShiftAllLeftAndFillUpperFromUint32x8 @@ -6021,6 +6031,7 @@ const ( OpSetElemUint64x2 OpShiftAllLeftAndFillUpperFromUint64x2 OpShiftAllRightAndFillUpperFromUint64x2 + OpGet128Uint64x4 OpMaskedRotateAllLeftUint64x4 OpMaskedRotateAllRightUint64x4 OpMaskedShiftAllLeftAndFillUpperFromUint64x4 @@ -6046,6 +6057,7 @@ const ( OpSetElemUint8x16 OpGaloisFieldAffineTransformUint8x32 OpGaloisFieldAffineTransformInversedUint8x32 + OpGet128Uint8x32 OpMaskedGaloisFieldAffineTransformUint8x32 OpMaskedGaloisFieldAffineTransformInversedUint8x32 OpSet128Uint8x32 @@ -30096,6 +30108,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VEXTRACTF128128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVEXTRACTF128, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VRNDSCALEPSMasked256", auxType: auxInt8, @@ -31820,6 +31846,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VEXTRACTI128128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVEXTRACTI128, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPCMPB256", auxType: auxInt8, @@ -67706,6 +67746,12 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "Get128Float32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedCeilWithPrecisionFloat32x8", auxType: auxInt8, @@ -67904,6 +67950,12 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "Get128Float64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedCeilWithPrecisionFloat64x4", auxType: auxInt8, @@ -68066,6 +68118,12 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "Get128Int16x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", auxType: auxInt8, @@ -68264,6 +68322,12 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "Get128Int32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedRotateAllLeftInt32x8", auxType: auxInt8, @@ -68378,6 +68442,12 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "Get128Int64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedRotateAllLeftInt64x4", auxType: auxInt8, @@ -68492,12 +68562,24 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "Get128Int8x32", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "Set128Int8x32", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "Get128Uint16x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", auxType: auxInt8, @@ -68696,6 +68778,12 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "Get128Uint32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedRotateAllLeftUint32x8", auxType: auxInt8, @@ -68810,6 +68898,12 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "Get128Uint64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedRotateAllLeftUint64x4", auxType: auxInt8, @@ -68960,6 +69054,12 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "Get128Uint8x32", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "MaskedGaloisFieldAffineTransformUint8x32", auxType: auxInt8, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 5c1872dcdfd..1cf23c4ec5b 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1388,6 +1388,26 @@ func rewriteValueAMD64(v *Value) bool { case OpGaloisFieldMulUint8x64: v.Op = OpAMD64VGF2P8MULB512 return true + case OpGet128Float32x8: + return rewriteValueAMD64_OpGet128Float32x8(v) + case OpGet128Float64x4: + return rewriteValueAMD64_OpGet128Float64x4(v) + case OpGet128Int16x16: + return rewriteValueAMD64_OpGet128Int16x16(v) + case OpGet128Int32x8: + return rewriteValueAMD64_OpGet128Int32x8(v) + case OpGet128Int64x4: + return rewriteValueAMD64_OpGet128Int64x4(v) + case OpGet128Int8x32: + return rewriteValueAMD64_OpGet128Int8x32(v) + case OpGet128Uint16x16: + return rewriteValueAMD64_OpGet128Uint16x16(v) + case OpGet128Uint32x8: + return rewriteValueAMD64_OpGet128Uint32x8(v) + case OpGet128Uint64x4: + return rewriteValueAMD64_OpGet128Uint64x4(v) + case OpGet128Uint8x32: + return rewriteValueAMD64_OpGet128Uint8x32(v) case OpGetCallerPC: v.Op = OpAMD64LoweredGetCallerPC return true @@ -30999,6 +31019,136 @@ func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpGet128Float32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Float32x8 [a] x) + // result: (VEXTRACTF128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Float64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Float64x4 [a] x) + // result: (VEXTRACTF128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int16x16 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int32x8 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int64x4 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int8x32 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint16x16 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint32x8 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint64x4 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint8x32 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool { v_0 := v.Args[0] // match: (GetElemInt16x8 [a] x) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 3d0e6fbd4aa..27aad1cc0c4 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -235,6 +235,16 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.Get128", opLen1Imm8(ssa.OpGet128Int16x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.Get128", opLen1Imm8(ssa.OpGet128Int32x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.Get128", opLen1Imm8(ssa.OpGet128Int64x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.Get128", opLen1Imm8(ssa.OpGet128Uint8x32, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.Get128", opLen1Imm8(ssa.OpGet128Uint16x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.Get128", opLen1Imm8(ssa.OpGet128Uint32x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.Get128", opLen1Imm8(ssa.OpGet128Uint64x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64) addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64) addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64) diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index f99938bb9d2..1b47d2770cc 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -161,6 +161,22 @@ func checkInt8Slices(t *testing.T, a, b []int8) { } } +func checkFloat32Slices(t *testing.T, a, b []float32) { + for i := range b { + if a[i] != b[i] { + t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i]) + } + } +} + +func checkFloat64Slices(t *testing.T, a, b []float64) { + for i := range b { + if a[i] != b[i] { + t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i]) + } + } +} + func TestSlicesInt8(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} @@ -209,6 +225,78 @@ func TestSlicesInt8Set128(t *testing.T) { checkInt8Slices(t, a, b[16:]) } +func TestSlicesInt8Get128(t *testing.T) { + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + u := simd.LoadInt8x32Slice(a) // 1-32 + v := u.Get128(0) // 1-16 + w := u.Get128(1) // 17-32 + + b := make([]int8, 32, 32) + v.StoreSlice(b[:16]) + w.StoreSlice(b[16:]) + + checkInt8Slices(t, a, b) +} + +func TestSlicesFloat32Set128(t *testing.T) { + a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadFloat32x4Slice(a) // 1-4 + u := simd.LoadFloat32x8Slice(a) // 1-4 + + w := u.Set128(1, v) // 1-4:1-4 + + b := make([]float32, 8, 8) + w.StoreSlice(b) + + checkFloat32Slices(t, a, b[:4]) + checkFloat32Slices(t, a, b[4:]) +} + +func TestSlicesFloat32Get128(t *testing.T) { + a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + u := simd.LoadFloat32x8Slice(a) // 1-8 + v := u.Get128(0) // 1-4 + w := u.Get128(1) // 5-8 + + b := make([]float32, 8, 8) + v.StoreSlice(b[:4]) + w.StoreSlice(b[4:]) + + checkFloat32Slices(t, a, b) +} + +func TestSlicesFloat64Set128(t *testing.T) { + a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadFloat64x2Slice(a) // 1-2 + u := simd.LoadFloat64x4Slice(a) // 1-2 + + w := u.Set128(1, v) // 1-2:1-2 + + b := make([]float64, 4, 4) + w.StoreSlice(b) + + checkFloat64Slices(t, a, b[:2]) + checkFloat64Slices(t, a, b[2:]) +} + +func TestSlicesFloat64Get128(t *testing.T) { + a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + u := simd.LoadFloat64x4Slice(a) // 1-4 + v := u.Get128(0) // 1-2 + w := u.Get128(1) // 3-4 + + b := make([]float64, 4, 4) + v.StoreSlice(b[:2]) + w.StoreSlice(b[2:]) + + checkFloat64Slices(t, a, b) +} + func TestSlicesInt8TooShortLoad(t *testing.T) { defer func() { if r := recover(); r != nil { diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index 4a8c0957e5b..b3f18b38377 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -7954,6 +7954,7 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 // FloorWithPrecision // GaloisFieldAffineTransform // GaloisFieldAffineTransformInversed +// Get128 // GetElem // MaskedCeilWithPrecision // MaskedDiffWithCeilWithPrecision diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index de54a9ada48..3453843d0f7 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -1198,6 +1198,58 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32 // Asm: VGF2P8MULB, CPU Feature: AVX512EVEX func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64 +/* Get128 */ + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float32x8) Get128(imm uint8) Float32x4 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float64x4) Get128(imm uint8) Float64x2 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int8x32) Get128(imm uint8) Int8x16 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int16x16) Get128(imm uint8) Int16x8 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int32x8) Get128(imm uint8) Int32x4 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int64x4) Get128(imm uint8) Int64x2 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint8x32) Get128(imm uint8) Uint8x16 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint16x16) Get128(imm uint8) Uint16x8 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint32x8) Get128(imm uint8) Uint32x4 + +// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint64x4) Get128(imm uint8) Uint64x2 + /* GetElem */ // GetElem retrieves a single constant-indexed element's value.