[dev.simd] cmd/compile, simd: Int64x2 Greater and Uint* Equal

This CL is generated by CL 686817.

Change-Id: I19b8e468594514b2b1c99f8ad766f78b5e194c80
Reviewed-on: https://go-review.googlesource.com/c/go/+/686876
TryBot-Bypass: David Chase <drchase@google.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao 2025-07-09 16:24:34 +00:00
parent 8db7f41674
commit aab8b173a9
8 changed files with 197 additions and 341 deletions

View file

@ -115,6 +115,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPCMPGTW256,
ssa.OpAMD64VPCMPGTD128,
ssa.OpAMD64VPCMPGTD256,
ssa.OpAMD64VPCMPGTQ128,
ssa.OpAMD64VPCMPGTQ256,
ssa.OpAMD64VMAXPS128,
ssa.OpAMD64VMAXPS256,
@ -688,25 +689,25 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPCMPW512,
ssa.OpAMD64VPCMPD512,
ssa.OpAMD64VPCMPQ512,
ssa.OpAMD64VPCMPUB512,
ssa.OpAMD64VPCMPUW512,
ssa.OpAMD64VPCMPUD512,
ssa.OpAMD64VPCMPUQ512,
ssa.OpAMD64VPCMPUB128,
ssa.OpAMD64VPCMPUB256,
ssa.OpAMD64VPCMPUB512,
ssa.OpAMD64VPCMPUW128,
ssa.OpAMD64VPCMPUW256,
ssa.OpAMD64VPCMPUW512,
ssa.OpAMD64VPCMPUD128,
ssa.OpAMD64VPCMPUD256,
ssa.OpAMD64VPCMPUD512,
ssa.OpAMD64VPCMPUQ128,
ssa.OpAMD64VPCMPUQ256,
ssa.OpAMD64VPCMPUQ512,
ssa.OpAMD64VPCMPQ128,
ssa.OpAMD64VPCMPB128,
ssa.OpAMD64VPCMPB256,
ssa.OpAMD64VPCMPW128,
ssa.OpAMD64VPCMPW256,
ssa.OpAMD64VPCMPD128,
ssa.OpAMD64VPCMPD256,
ssa.OpAMD64VPCMPQ128,
ssa.OpAMD64VPCMPQ256:
p = simdV2kImm8(s, v)

View file

@ -283,17 +283,17 @@
(EqualInt64x2 ...) => (VPCMPEQQ128 ...)
(EqualInt64x4 ...) => (VPCMPEQQ256 ...)
(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y))
(EqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [0] x y))
(EqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [0] x y))
(EqualUint8x16 ...) => (VPCMPEQB128 ...)
(EqualUint8x32 ...) => (VPCMPEQB256 ...)
(EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
(EqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [0] x y))
(EqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [0] x y))
(EqualUint16x8 ...) => (VPCMPEQW128 ...)
(EqualUint16x16 ...) => (VPCMPEQW256 ...)
(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y))
(EqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [0] x y))
(EqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [0] x y))
(EqualUint32x4 ...) => (VPCMPEQD128 ...)
(EqualUint32x8 ...) => (VPCMPEQD256 ...)
(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y))
(EqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y))
(EqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y))
(EqualUint64x2 ...) => (VPCMPEQQ128 ...)
(EqualUint64x4 ...) => (VPCMPEQQ256 ...)
(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y))
(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
@ -428,7 +428,7 @@
(GreaterInt32x4 ...) => (VPCMPGTD128 ...)
(GreaterInt32x8 ...) => (VPCMPGTD256 ...)
(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [14] x y))
(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [14] x y))
(GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
(GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [14] x y))
(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))

View file

@ -436,6 +436,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPANDQMasked128", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPANDNQMasked128", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPEQQ128", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPCMPGTQ128", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMAXSQ128", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMAXSQMasked128", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINSQ128", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false},
@ -837,36 +838,36 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VGF2P8AFFINEQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VGF2P8AFFINEINVQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VGF2P8AFFINEQB256", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB256", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VGF2P8AFFINEQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},

View file

@ -1629,6 +1629,7 @@ const (
OpAMD64VPANDQMasked128
OpAMD64VPANDNQMasked128
OpAMD64VPCMPEQQ128
OpAMD64VPCMPGTQ128
OpAMD64VPMAXSQ128
OpAMD64VPMAXSQMasked128
OpAMD64VPMINSQ128
@ -2030,36 +2031,36 @@ const (
OpAMD64VINSERTI128256
OpAMD64VPCMPB512
OpAMD64VPCMPBMasked512
OpAMD64VPCMPUW256
OpAMD64VPCMPUWMasked256
OpAMD64VPCMPUW256
OpAMD64VPCMPUW512
OpAMD64VPCMPUWMasked512
OpAMD64VPCMPUW128
OpAMD64VPCMPUWMasked128
OpAMD64VPCMPUW128
OpAMD64VPCMPUD512
OpAMD64VPCMPUDMasked512
OpAMD64VPCMPUD128
OpAMD64VPCMPUDMasked128
OpAMD64VPCMPUD256
OpAMD64VPCMPUD128
OpAMD64VPCMPUDMasked256
OpAMD64VPCMPUQ128
OpAMD64VPCMPUD256
OpAMD64VPCMPUQMasked128
OpAMD64VPCMPUQ256
OpAMD64VPCMPUQ128
OpAMD64VPCMPUQMasked256
OpAMD64VPCMPUQ256
OpAMD64VPCMPUQ512
OpAMD64VPCMPUQMasked512
OpAMD64VPCMPUB128
OpAMD64VPCMPUBMasked128
OpAMD64VGF2P8AFFINEQB128
OpAMD64VGF2P8AFFINEINVQB128
OpAMD64VGF2P8AFFINEINVQBMasked128
OpAMD64VGF2P8AFFINEQBMasked128
OpAMD64VPCMPUB256
OpAMD64VPCMPUB128
OpAMD64VPCMPUBMasked256
OpAMD64VGF2P8AFFINEQB256
OpAMD64VGF2P8AFFINEINVQB256
OpAMD64VGF2P8AFFINEINVQBMasked256
OpAMD64VGF2P8AFFINEQBMasked256
OpAMD64VPCMPUB256
OpAMD64VPCMPUB512
OpAMD64VPCMPUBMasked512
OpAMD64VGF2P8AFFINEQB512
@ -25058,6 +25059,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPGTQ128",
argLen: 2,
asm: x86.AVPCMPGTQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "VPMAXSQ128",
argLen: 2,
@ -31112,22 +31127,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUW256",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUWMasked256",
auxType: auxInt8,
@ -31145,6 +31144,21 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUW256",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUW512",
auxType: auxInt8,
@ -31178,22 +31192,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUW128",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUWMasked128",
auxType: auxInt8,
@ -31211,6 +31209,21 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUW128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUD512",
auxType: auxInt8,
@ -31244,22 +31257,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUD128",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUDMasked128",
auxType: auxInt8,
@ -31278,11 +31275,10 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "VPCMPUD256",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUD,
name: "VPCMPUD128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@ -31311,11 +31307,10 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "VPCMPUQ128",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUQ,
name: "VPCMPUD256",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@ -31344,11 +31339,10 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "VPCMPUQ256",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUQ,
name: "VPCMPUQ128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@ -31376,6 +31370,21 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUQ256",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUQ512",
auxType: auxInt8,
@ -31409,22 +31418,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUB128",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUB,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUBMasked128",
auxType: auxInt8,
@ -31505,11 +31498,10 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "VPCMPUB256",
auxType: auxInt8,
argLen: 2,
commutative: true,
asm: x86.AVPCMPUB,
name: "VPCMPUB128",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUB,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@ -31599,6 +31591,21 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "VPCMPUB256",
auxType: auxInt8,
argLen: 2,
asm: x86.AVPCMPUB,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
},
},
},
{
name: "VPCMPUB512",
auxType: auxInt8,

View file

@ -1530,27 +1530,35 @@ func rewriteValueAMD64(v *Value) bool {
case OpEqualMaskedUint8x64:
return rewriteValueAMD64_OpEqualMaskedUint8x64(v)
case OpEqualUint16x16:
return rewriteValueAMD64_OpEqualUint16x16(v)
v.Op = OpAMD64VPCMPEQW256
return true
case OpEqualUint16x32:
return rewriteValueAMD64_OpEqualUint16x32(v)
case OpEqualUint16x8:
return rewriteValueAMD64_OpEqualUint16x8(v)
v.Op = OpAMD64VPCMPEQW128
return true
case OpEqualUint32x16:
return rewriteValueAMD64_OpEqualUint32x16(v)
case OpEqualUint32x4:
return rewriteValueAMD64_OpEqualUint32x4(v)
v.Op = OpAMD64VPCMPEQD128
return true
case OpEqualUint32x8:
return rewriteValueAMD64_OpEqualUint32x8(v)
v.Op = OpAMD64VPCMPEQD256
return true
case OpEqualUint64x2:
return rewriteValueAMD64_OpEqualUint64x2(v)
v.Op = OpAMD64VPCMPEQQ128
return true
case OpEqualUint64x4:
return rewriteValueAMD64_OpEqualUint64x4(v)
v.Op = OpAMD64VPCMPEQQ256
return true
case OpEqualUint64x8:
return rewriteValueAMD64_OpEqualUint64x8(v)
case OpEqualUint8x16:
return rewriteValueAMD64_OpEqualUint8x16(v)
v.Op = OpAMD64VPCMPEQB128
return true
case OpEqualUint8x32:
return rewriteValueAMD64_OpEqualUint8x32(v)
v.Op = OpAMD64VPCMPEQB256
return true
case OpEqualUint8x64:
return rewriteValueAMD64_OpEqualUint8x64(v)
case OpFMA:
@ -1914,7 +1922,8 @@ func rewriteValueAMD64(v *Value) bool {
v.Op = OpAMD64VPCMPGTD256
return true
case OpGreaterInt64x2:
return rewriteValueAMD64_OpGreaterInt64x2(v)
v.Op = OpAMD64VPCMPGTQ128
return true
case OpGreaterInt64x4:
v.Op = OpAMD64VPCMPGTQ256
return true
@ -33212,24 +33221,6 @@ func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpEqualUint16x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint16x16 x y)
// result: (VPMOVMToVec16x16 (VPCMPUW256 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec16x16)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@ -33248,24 +33239,6 @@ func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpEqualUint16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint16x8 x y)
// result: (VPMOVMToVec16x8 (VPCMPUW128 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec16x8)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@ -33284,78 +33257,6 @@ func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpEqualUint32x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint32x4 x y)
// result: (VPMOVMToVec32x4 (VPCMPUD128 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec32x4)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint32x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint32x8 x y)
// result: (VPMOVMToVec32x8 (VPCMPUD256 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec32x8)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint64x2 x y)
// result: (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec64x2)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint64x4(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint64x4 x y)
// result: (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec64x4)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@ -33374,42 +33275,6 @@ func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpEqualUint8x16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint8x16 x y)
// result: (VPMOVMToVec8x16 (VPCMPUB128 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec8x16)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint8x32(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint8x32 x y)
// result: (VPMOVMToVec8x32 (VPCMPUB256 [0] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec8x32)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
v0.AuxInt = int8ToAuxInt(0)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@ -35875,24 +35740,6 @@ func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
return true
}
}
func rewriteValueAMD64_OpGreaterInt64x2(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (GreaterInt64x2 x y)
// result: (VPMOVMToVec64x2 (VPCMPQ128 [14] x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec64x2)
v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask)
v0.AuxInt = int8ToAuxInt(14)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
}
}
func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View file

@ -284,6 +284,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
@ -294,17 +302,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
@ -430,6 +430,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
@ -440,7 +441,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)

View file

@ -1429,6 +1429,46 @@ func (x Int64x2) Equal(y Int64x2) Mask64x2
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Int64x4) Equal(y Int64x4) Mask64x4
// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Uint8x16) Equal(y Uint8x16) Mask8x16
// Equal compares for equality.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Uint8x32) Equal(y Uint8x32) Mask8x32
// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Uint16x8) Equal(y Uint16x8) Mask16x8
// Equal compares for equality.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Uint16x16) Equal(y Uint16x16) Mask16x16
// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Uint32x4) Equal(y Uint32x4) Mask32x4
// Equal compares for equality.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Uint32x8) Equal(y Uint32x8) Mask32x8
// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Uint64x2) Equal(y Uint64x2) Mask64x2
// Equal compares for equality.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Uint64x4) Equal(y Uint64x4) Mask64x4
// Equal compares for equality.
//
// Asm: VCMPPS, CPU Feature: AVX
@ -1479,61 +1519,21 @@ func (x Int32x16) Equal(y Int32x16) Mask32x16
// Asm: VPCMPQ, CPU Feature: AVX512EVEX
func (x Int64x8) Equal(y Int64x8) Mask64x8
// Equal compares for equality, masked.
//
// Asm: VPCMPUB, CPU Feature: AVX512EVEX
func (x Uint8x16) Equal(y Uint8x16) Mask8x16
// Equal compares for equality, masked.
//
// Asm: VPCMPUB, CPU Feature: AVX512EVEX
func (x Uint8x32) Equal(y Uint8x32) Mask8x32
// Equal compares for equality, masked.
//
// Asm: VPCMPUB, CPU Feature: AVX512EVEX
func (x Uint8x64) Equal(y Uint8x64) Mask8x64
// Equal compares for equality, masked.
//
// Asm: VPCMPUW, CPU Feature: AVX512EVEX
func (x Uint16x8) Equal(y Uint16x8) Mask16x8
// Equal compares for equality, masked.
//
// Asm: VPCMPUW, CPU Feature: AVX512EVEX
func (x Uint16x16) Equal(y Uint16x16) Mask16x16
// Equal compares for equality, masked.
//
// Asm: VPCMPUW, CPU Feature: AVX512EVEX
func (x Uint16x32) Equal(y Uint16x32) Mask16x32
// Equal compares for equality, masked.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
func (x Uint32x4) Equal(y Uint32x4) Mask32x4
// Equal compares for equality, masked.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
func (x Uint32x8) Equal(y Uint32x8) Mask32x8
// Equal compares for equality, masked.
//
// Asm: VPCMPUD, CPU Feature: AVX512EVEX
func (x Uint32x16) Equal(y Uint32x16) Mask32x16
// Equal compares for equality, masked.
//
// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
func (x Uint64x2) Equal(y Uint64x2) Mask64x2
// Equal compares for equality, masked.
//
// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
func (x Uint64x4) Equal(y Uint64x4) Mask64x4
// Equal compares for equality, masked.
//
// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
@ -2245,6 +2245,11 @@ func (x Int32x4) Greater(y Int32x4) Mask32x4
// Asm: VPCMPGTD, CPU Feature: AVX2
func (x Int32x8) Greater(y Int32x8) Mask32x8
// Greater compares for greater than.
//
// Asm: VPCMPGTQ, CPU Feature: AVX
func (x Int64x2) Greater(y Int64x2) Int64x2
// Greater compares for greater than.
//
// Asm: VPCMPGTQ, CPU Feature: AVX2
@ -2295,11 +2300,6 @@ func (x Int16x32) Greater(y Int16x32) Mask16x32
// Asm: VPCMPD, CPU Feature: AVX512EVEX
func (x Int32x16) Greater(y Int32x16) Mask32x16
// Greater compares for greater than.
//
// Asm: VPCMPQ, CPU Feature: AVX512EVEX
func (x Int64x2) Greater(y Int64x2) Mask64x2
// Greater compares for greater than.
//
// Asm: VPCMPQ, CPU Feature: AVX512EVEX

View file

@ -4018,6 +4018,8 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
gotv = vec0.And(vec1)
case "AndNot":
gotv = vec0.AndNot(vec1)
case "Greater":
gotv = vec0.Greater(vec1)
case "Max":
gotv = vec0.Max(vec1)
case "Min":
@ -4113,8 +4115,6 @@ func testInt64x2Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic
switch which {
case "Equal":
gotv = vec0.Equal(vec1).AsInt64x2()
case "Greater":
gotv = vec0.Greater(vec1).AsInt64x2()
case "GreaterEqual":
gotv = vec0.GreaterEqual(vec1).AsInt64x2()
case "Less":