[dev.simd] simd, cmd/compile: generated code for VPINSR[BWDQ], and test

This is paired with simdgen CL 683055

Change-Id: I91d2c08a97ddd7cf06dd24478d552b962846131c
Reviewed-on: https://go-review.googlesource.com/c/go/+/683035
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
David Chase 2025-06-20 15:30:55 -04:00
parent dd63b7aa0e
commit 1fa4bcfcda
9 changed files with 345 additions and 0 deletions

View file

@ -718,6 +718,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPDPBUSDMasked512: ssa.OpAMD64VPDPBUSDMasked512:
p = simdFp3k1fp1ResultInArg0(s, v) p = simdFp3k1fp1ResultInArg0(s, v)
case ssa.OpAMD64VPINSRB128,
ssa.OpAMD64VPINSRW128,
ssa.OpAMD64VPINSRD128,
ssa.OpAMD64VPINSRQ128:
p = simdFp1gp1fp1Imm8(s, v)
default: default:
// Unknown reg shape // Unknown reg shape
return false return false

View file

@ -1279,6 +1279,14 @@
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
(SetElemInt16x8 [a] x y) => (VPINSRW128 [a] x y)
(SetElemInt32x4 [a] x y) => (VPINSRD128 [a] x y)
(SetElemInt64x2 [a] x y) => (VPINSRQ128 [a] x y)
(SetElemInt8x16 [a] x y) => (VPINSRB128 [a] x y)
(SetElemUint16x8 [a] x y) => (VPINSRW128 [a] x y)
(SetElemUint32x4 [a] x y) => (VPINSRD128 [a] x y)
(SetElemUint64x2 [a] x y) => (VPINSRQ128 [a] x y)
(SetElemUint8x16 [a] x y) => (VPINSRB128 [a] x y)
(SignInt16x16 ...) => (VPSIGNW256 ...) (SignInt16x16 ...) => (VPSIGNW256 ...)
(SignInt16x8 ...) => (VPSIGNW128 ...) (SignInt16x8 ...) => (VPSIGNW128 ...)
(SignInt32x4 ...) => (VPSIGND128 ...) (SignInt32x4 ...) => (VPSIGND128 ...)

View file

@ -645,20 +645,24 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
{name: "VPCMPWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPW128", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPW128", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRW128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPD512", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPD128", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPD128", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRD128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPD256", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPD256", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ128", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ128", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRQ128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPQ256", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ256", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ512", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPB128", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPB128", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPB256", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPB256", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPB512", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPB512", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},

View file

@ -1372,5 +1372,13 @@ func simdGenericOps() []opData {
{name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
{name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
} }
} }

View file

@ -1838,20 +1838,24 @@ const (
OpAMD64VPCMPWMasked512 OpAMD64VPCMPWMasked512
OpAMD64VPCMPW128 OpAMD64VPCMPW128
OpAMD64VPCMPWMasked128 OpAMD64VPCMPWMasked128
OpAMD64VPINSRW128
OpAMD64VPCMPD512 OpAMD64VPCMPD512
OpAMD64VPCMPDMasked512 OpAMD64VPCMPDMasked512
OpAMD64VPCMPD128 OpAMD64VPCMPD128
OpAMD64VPCMPDMasked128 OpAMD64VPCMPDMasked128
OpAMD64VPINSRD128
OpAMD64VPCMPD256 OpAMD64VPCMPD256
OpAMD64VPCMPDMasked256 OpAMD64VPCMPDMasked256
OpAMD64VPCMPQ128 OpAMD64VPCMPQ128
OpAMD64VPCMPQMasked128 OpAMD64VPCMPQMasked128
OpAMD64VPINSRQ128
OpAMD64VPCMPQ256 OpAMD64VPCMPQ256
OpAMD64VPCMPQMasked256 OpAMD64VPCMPQMasked256
OpAMD64VPCMPQ512 OpAMD64VPCMPQ512
OpAMD64VPCMPQMasked512 OpAMD64VPCMPQMasked512
OpAMD64VPCMPB128 OpAMD64VPCMPB128
OpAMD64VPCMPBMasked128 OpAMD64VPCMPBMasked128
OpAMD64VPINSRB128
OpAMD64VPCMPB256 OpAMD64VPCMPB256
OpAMD64VPCMPBMasked256 OpAMD64VPCMPBMasked256
OpAMD64VPCMPB512 OpAMD64VPCMPB512
@ -5475,6 +5479,14 @@ const (
OpRoundWithPrecisionFloat64x8 OpRoundWithPrecisionFloat64x8
OpTruncSuppressExceptionWithPrecisionFloat64x8 OpTruncSuppressExceptionWithPrecisionFloat64x8
OpTruncWithPrecisionFloat64x8 OpTruncWithPrecisionFloat64x8
OpSetElemInt16x8
OpSetElemInt32x4
OpSetElemInt64x2
OpSetElemInt8x16
OpSetElemUint16x8
OpSetElemUint32x4
OpSetElemUint64x2
OpSetElemUint8x16
) )
var opcodeTable = [...]opInfo{ var opcodeTable = [...]opInfo{
@ -27738,6 +27750,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPINSRW128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPINSRW,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPCMPD512", name: "VPCMPD512",
auxType: auxInt8, auxType: auxInt8,
@ -27803,6 +27830,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPINSRD128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPINSRD,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPCMPD256", name: "VPCMPD256",
auxType: auxInt8, auxType: auxInt8,
@ -27867,6 +27909,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPINSRQ128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPINSRQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPCMPQ256", name: "VPCMPQ256",
auxType: auxInt8, auxType: auxInt8,
@ -27964,6 +28021,21 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "VPINSRB128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPINSRB,
reg: regInfo{
inputs: []inputInfo{
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{ {
name: "VPCMPB256", name: "VPCMPB256",
auxType: auxInt8, auxType: auxInt8,
@ -63153,6 +63225,54 @@ var opcodeTable = [...]opInfo{
argLen: 1, argLen: 1,
generic: true, generic: true,
}, },
{
name: "SetElemInt16x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemInt32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemInt64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemInt8x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemUint16x8",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemUint32x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemUint64x2",
auxType: auxInt8,
argLen: 2,
generic: true,
},
{
name: "SetElemUint8x16",
auxType: auxInt8,
argLen: 2,
generic: true,
},
} }
func (o Op) Asm() obj.As { return opcodeTable[o].asm } func (o Op) Asm() obj.As { return opcodeTable[o].asm }

View file

@ -4038,6 +4038,22 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpSelect1(v) return rewriteValueAMD64_OpSelect1(v)
case OpSelectN: case OpSelectN:
return rewriteValueAMD64_OpSelectN(v) return rewriteValueAMD64_OpSelectN(v)
case OpSetElemInt16x8:
return rewriteValueAMD64_OpSetElemInt16x8(v)
case OpSetElemInt32x4:
return rewriteValueAMD64_OpSetElemInt32x4(v)
case OpSetElemInt64x2:
return rewriteValueAMD64_OpSetElemInt64x2(v)
case OpSetElemInt8x16:
return rewriteValueAMD64_OpSetElemInt8x16(v)
case OpSetElemUint16x8:
return rewriteValueAMD64_OpSetElemUint16x8(v)
case OpSetElemUint32x4:
return rewriteValueAMD64_OpSetElemUint32x4(v)
case OpSetElemUint64x2:
return rewriteValueAMD64_OpSetElemUint64x2(v)
case OpSetElemUint8x16:
return rewriteValueAMD64_OpSetElemUint8x16(v)
case OpSignExt16to32: case OpSignExt16to32:
v.Op = OpAMD64MOVWQSX v.Op = OpAMD64MOVWQSX
return true return true
@ -49462,6 +49478,126 @@ func rewriteValueAMD64_OpSelectN(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpSetElemInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemInt16x8 [a] x y)
// result: (VPINSRW128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRW128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemInt32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemInt32x4 [a] x y)
// result: (VPINSRD128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRD128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemInt64x2 [a] x y)
// result: (VPINSRQ128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRQ128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemInt8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemInt8x16 [a] x y)
// result: (VPINSRB128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRB128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemUint16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemUint16x8 [a] x y)
// result: (VPINSRW128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRW128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemUint32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemUint32x4 [a] x y)
// result: (VPINSRD128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRD128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemUint64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemUint64x2 [a] x y)
// result: (VPINSRQ128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRQ128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSetElemUint8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (SetElemUint8x16 [a] x y)
// result: (VPINSRB128 [a] x y)
for {
a := auxIntToInt8(v.AuxInt)
x := v_0
y := v_1
v.reset(OpAMD64VPINSRB128)
v.AuxInt = int8ToAuxInt(a)
v.AddArg2(x, y)
return true
}
}
func rewriteValueAMD64_OpSlicemask(v *Value) bool { func rewriteValueAMD64_OpSlicemask(v *Value) bool {
v_0 := v.Args[0] v_0 := v.Args[0]
b := v.Block b := v.Block

View file

@ -1290,6 +1290,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int64x2.SetElem", opLen2Imm8(ssa.OpSetElemInt64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint8x16.SetElem", opLen2Imm8(ssa.OpSetElemUint8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint16x8.SetElem", opLen2Imm8(ssa.OpSetElemUint16x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint32x4.SetElem", opLen2Imm8(ssa.OpSetElemUint32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)

View file

@ -230,6 +230,19 @@ func TestSlicesInt8(t *testing.T) {
checkInt8Slices(t, a, b) checkInt8Slices(t, a, b)
} }
func TestSlicesInt8SetElem(t *testing.T) {
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := simd.LoadInt8x16Slice(a)
v = v.SetElem(3, 13)
a[3] = 13
b := make([]int8, 16, 16)
v.StoreSlice(b)
checkInt8Slices(t, a, b)
}
func TestSlicesInt8TooShortLoad(t *testing.T) { func TestSlicesInt8TooShortLoad(t *testing.T) {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {

View file

@ -7242,6 +7242,48 @@ func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16 func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
/* SetElem */
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRB, CPU Feature: AVX
func (x Int8x16) SetElem(imm uint8, y int8) Int8x16
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRW, CPU Feature: AVX
func (x Int16x8) SetElem(imm uint8, y int16) Int16x8
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Int32x4) SetElem(imm uint8, y int8) Int32x4
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Int64x2) SetElem(imm uint8, y int64) Int64x2
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRB, CPU Feature: AVX
func (x Uint8x16) SetElem(imm uint8, y uint8) Uint8x16
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRW, CPU Feature: AVX
func (x Uint16x8) SetElem(imm uint8, y uint16) Uint16x8
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRD, CPU Feature: AVX
func (x Uint32x4) SetElem(imm uint8, y uint8) Uint32x4
// SetElem sets a single constant-indexed element's value
//
// Asm: VPINSRQ, CPU Feature: AVX
func (x Uint64x2) SetElem(imm uint8, y uint64) Uint64x2
/* Sign */ /* Sign */
// Sign returns the product of the first operand with -1, 0, or 1, // Sign returns the product of the first operand with -1, 0, or 1,