[dev.simd] simd/_gen/simdgen, cmd/compile: add memory op machine ops

This CL adds the machine ops for memory-op and also their prog writing
logic.

This CL also fixes a bug in the XED parser. Previously the
merge of machine ops is not checking the CPU feature, so some AVX
instruction might have their "memFeatures" field set incorrectly.
However since that field is not used until this CL, putting the fix here
should be ok.

Change-Id: I91031cbbf63453257473dd1d2ff47f7496d1a01d
Reviewed-on: https://go-review.googlesource.com/c/go/+/701198
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao 2025-09-04 18:20:59 +00:00
parent c39b2fdd1e
commit 5a0446d449
10 changed files with 10116 additions and 73 deletions

View file

@ -1353,6 +1353,514 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSHRDQMasked512: ssa.OpAMD64VPSHRDQMasked512:
p = simdV2kvImm8(s, v) p = simdV2kvImm8(s, v)
case ssa.OpAMD64VPABSDMasked128load,
ssa.OpAMD64VPABSDMasked256load,
ssa.OpAMD64VPABSDMasked512load,
ssa.OpAMD64VPABSQMasked128load,
ssa.OpAMD64VPABSQMasked256load,
ssa.OpAMD64VPABSQMasked512load,
ssa.OpAMD64VCVTTPS2DQMasked128load,
ssa.OpAMD64VCVTTPS2DQMasked256load,
ssa.OpAMD64VCVTTPS2DQMasked512load,
ssa.OpAMD64VCVTPS2UDQMasked128load,
ssa.OpAMD64VCVTPS2UDQMasked256load,
ssa.OpAMD64VCVTPS2UDQMasked512load,
ssa.OpAMD64VPOPCNTDMasked128load,
ssa.OpAMD64VPOPCNTDMasked256load,
ssa.OpAMD64VPOPCNTDMasked512load,
ssa.OpAMD64VPOPCNTQMasked128load,
ssa.OpAMD64VPOPCNTQMasked256load,
ssa.OpAMD64VPOPCNTQMasked512load,
ssa.OpAMD64VRCP14PSMasked128load,
ssa.OpAMD64VRCP14PSMasked256load,
ssa.OpAMD64VRCP14PSMasked512load,
ssa.OpAMD64VRCP14PDMasked128load,
ssa.OpAMD64VRCP14PDMasked256load,
ssa.OpAMD64VRCP14PDMasked512load,
ssa.OpAMD64VRSQRT14PSMasked128load,
ssa.OpAMD64VRSQRT14PSMasked256load,
ssa.OpAMD64VRSQRT14PSMasked512load,
ssa.OpAMD64VRSQRT14PDMasked128load,
ssa.OpAMD64VRSQRT14PDMasked256load,
ssa.OpAMD64VRSQRT14PDMasked512load,
ssa.OpAMD64VSQRTPSMasked128load,
ssa.OpAMD64VSQRTPSMasked256load,
ssa.OpAMD64VSQRTPSMasked512load,
ssa.OpAMD64VSQRTPDMasked128load,
ssa.OpAMD64VSQRTPDMasked256load,
ssa.OpAMD64VSQRTPDMasked512load:
p = simdVkvload(s, v)
case ssa.OpAMD64VADDPS128load,
ssa.OpAMD64VADDPS256load,
ssa.OpAMD64VADDPS512load,
ssa.OpAMD64VADDPD128load,
ssa.OpAMD64VADDPD256load,
ssa.OpAMD64VADDPD512load,
ssa.OpAMD64VPADDD128load,
ssa.OpAMD64VPADDD256load,
ssa.OpAMD64VPADDD512load,
ssa.OpAMD64VPADDQ128load,
ssa.OpAMD64VPADDQ256load,
ssa.OpAMD64VPADDQ512load,
ssa.OpAMD64VPANDD512load,
ssa.OpAMD64VPANDQ512load,
ssa.OpAMD64VPANDND512load,
ssa.OpAMD64VPANDNQ512load,
ssa.OpAMD64VPACKSSDW128load,
ssa.OpAMD64VPACKSSDW256load,
ssa.OpAMD64VPACKSSDW512load,
ssa.OpAMD64VPACKUSDW128load,
ssa.OpAMD64VPACKUSDW256load,
ssa.OpAMD64VPACKUSDW512load,
ssa.OpAMD64VDIVPS128load,
ssa.OpAMD64VDIVPS256load,
ssa.OpAMD64VDIVPS512load,
ssa.OpAMD64VDIVPD128load,
ssa.OpAMD64VDIVPD256load,
ssa.OpAMD64VDIVPD512load,
ssa.OpAMD64VPCMPEQD128load,
ssa.OpAMD64VPCMPEQD256load,
ssa.OpAMD64VPCMPEQQ128load,
ssa.OpAMD64VPCMPEQQ256load,
ssa.OpAMD64VPCMPGTD128load,
ssa.OpAMD64VPCMPGTD256load,
ssa.OpAMD64VPCMPGTQ128load,
ssa.OpAMD64VPCMPGTQ256load,
ssa.OpAMD64VPUNPCKHDQ128load,
ssa.OpAMD64VPUNPCKHQDQ128load,
ssa.OpAMD64VPUNPCKHDQ256load,
ssa.OpAMD64VPUNPCKHDQ512load,
ssa.OpAMD64VPUNPCKHQDQ256load,
ssa.OpAMD64VPUNPCKHQDQ512load,
ssa.OpAMD64VPUNPCKLDQ128load,
ssa.OpAMD64VPUNPCKLQDQ128load,
ssa.OpAMD64VPUNPCKLDQ256load,
ssa.OpAMD64VPUNPCKLDQ512load,
ssa.OpAMD64VPUNPCKLQDQ256load,
ssa.OpAMD64VPUNPCKLQDQ512load,
ssa.OpAMD64VMAXPS128load,
ssa.OpAMD64VMAXPS256load,
ssa.OpAMD64VMAXPS512load,
ssa.OpAMD64VMAXPD128load,
ssa.OpAMD64VMAXPD256load,
ssa.OpAMD64VMAXPD512load,
ssa.OpAMD64VPMAXSD128load,
ssa.OpAMD64VPMAXSD256load,
ssa.OpAMD64VPMAXSD512load,
ssa.OpAMD64VPMAXSQ128load,
ssa.OpAMD64VPMAXSQ256load,
ssa.OpAMD64VPMAXSQ512load,
ssa.OpAMD64VPMAXUD128load,
ssa.OpAMD64VPMAXUD256load,
ssa.OpAMD64VPMAXUD512load,
ssa.OpAMD64VPMAXUQ128load,
ssa.OpAMD64VPMAXUQ256load,
ssa.OpAMD64VPMAXUQ512load,
ssa.OpAMD64VMINPS128load,
ssa.OpAMD64VMINPS256load,
ssa.OpAMD64VMINPS512load,
ssa.OpAMD64VMINPD128load,
ssa.OpAMD64VMINPD256load,
ssa.OpAMD64VMINPD512load,
ssa.OpAMD64VPMINSD128load,
ssa.OpAMD64VPMINSD256load,
ssa.OpAMD64VPMINSD512load,
ssa.OpAMD64VPMINSQ128load,
ssa.OpAMD64VPMINSQ256load,
ssa.OpAMD64VPMINSQ512load,
ssa.OpAMD64VPMINUD128load,
ssa.OpAMD64VPMINUD256load,
ssa.OpAMD64VPMINUD512load,
ssa.OpAMD64VPMINUQ128load,
ssa.OpAMD64VPMINUQ256load,
ssa.OpAMD64VPMINUQ512load,
ssa.OpAMD64VMULPS128load,
ssa.OpAMD64VMULPS256load,
ssa.OpAMD64VMULPS512load,
ssa.OpAMD64VMULPD128load,
ssa.OpAMD64VMULPD256load,
ssa.OpAMD64VMULPD512load,
ssa.OpAMD64VPMULLD128load,
ssa.OpAMD64VPMULLD256load,
ssa.OpAMD64VPMULLD512load,
ssa.OpAMD64VPMULLQ128load,
ssa.OpAMD64VPMULLQ256load,
ssa.OpAMD64VPMULLQ512load,
ssa.OpAMD64VPMULDQ128load,
ssa.OpAMD64VPMULDQ256load,
ssa.OpAMD64VPMULUDQ128load,
ssa.OpAMD64VPMULUDQ256load,
ssa.OpAMD64VPORD512load,
ssa.OpAMD64VPORQ512load,
ssa.OpAMD64VPERMPS256load,
ssa.OpAMD64VPERMD256load,
ssa.OpAMD64VPERMPS512load,
ssa.OpAMD64VPERMD512load,
ssa.OpAMD64VPERMPD256load,
ssa.OpAMD64VPERMQ256load,
ssa.OpAMD64VPERMPD512load,
ssa.OpAMD64VPERMQ512load,
ssa.OpAMD64VPROLVD128load,
ssa.OpAMD64VPROLVD256load,
ssa.OpAMD64VPROLVD512load,
ssa.OpAMD64VPROLVQ128load,
ssa.OpAMD64VPROLVQ256load,
ssa.OpAMD64VPROLVQ512load,
ssa.OpAMD64VPRORVD128load,
ssa.OpAMD64VPRORVD256load,
ssa.OpAMD64VPRORVD512load,
ssa.OpAMD64VPRORVQ128load,
ssa.OpAMD64VPRORVQ256load,
ssa.OpAMD64VPRORVQ512load,
ssa.OpAMD64VSCALEFPS128load,
ssa.OpAMD64VSCALEFPS256load,
ssa.OpAMD64VSCALEFPS512load,
ssa.OpAMD64VSCALEFPD128load,
ssa.OpAMD64VSCALEFPD256load,
ssa.OpAMD64VSCALEFPD512load,
ssa.OpAMD64VPSLLVD128load,
ssa.OpAMD64VPSLLVD256load,
ssa.OpAMD64VPSLLVD512load,
ssa.OpAMD64VPSLLVQ128load,
ssa.OpAMD64VPSLLVQ256load,
ssa.OpAMD64VPSLLVQ512load,
ssa.OpAMD64VPSRAVD128load,
ssa.OpAMD64VPSRAVD256load,
ssa.OpAMD64VPSRAVD512load,
ssa.OpAMD64VPSRAVQ128load,
ssa.OpAMD64VPSRAVQ256load,
ssa.OpAMD64VPSRAVQ512load,
ssa.OpAMD64VPSRLVD128load,
ssa.OpAMD64VPSRLVD256load,
ssa.OpAMD64VPSRLVD512load,
ssa.OpAMD64VPSRLVQ128load,
ssa.OpAMD64VPSRLVQ256load,
ssa.OpAMD64VPSRLVQ512load,
ssa.OpAMD64VSUBPS128load,
ssa.OpAMD64VSUBPS256load,
ssa.OpAMD64VSUBPS512load,
ssa.OpAMD64VSUBPD128load,
ssa.OpAMD64VSUBPD256load,
ssa.OpAMD64VSUBPD512load,
ssa.OpAMD64VPSUBD128load,
ssa.OpAMD64VPSUBD256load,
ssa.OpAMD64VPSUBD512load,
ssa.OpAMD64VPSUBQ128load,
ssa.OpAMD64VPSUBQ256load,
ssa.OpAMD64VPSUBQ512load,
ssa.OpAMD64VPXORD512load,
ssa.OpAMD64VPXORQ512load:
p = simdV21load(s, v)
case ssa.OpAMD64VPDPWSSD128load,
ssa.OpAMD64VPDPWSSD256load,
ssa.OpAMD64VPDPWSSD512load,
ssa.OpAMD64VPDPWSSDS128load,
ssa.OpAMD64VPDPWSSDS256load,
ssa.OpAMD64VPDPWSSDS512load,
ssa.OpAMD64VPDPBUSD128load,
ssa.OpAMD64VPDPBUSD256load,
ssa.OpAMD64VPDPBUSD512load,
ssa.OpAMD64VPDPBUSDS128load,
ssa.OpAMD64VPDPBUSDS256load,
ssa.OpAMD64VPDPBUSDS512load,
ssa.OpAMD64VFMADD213PS128load,
ssa.OpAMD64VFMADD213PS256load,
ssa.OpAMD64VFMADD213PS512load,
ssa.OpAMD64VFMADD213PD128load,
ssa.OpAMD64VFMADD213PD256load,
ssa.OpAMD64VFMADD213PD512load,
ssa.OpAMD64VFMADDSUB213PS128load,
ssa.OpAMD64VFMADDSUB213PS256load,
ssa.OpAMD64VFMADDSUB213PS512load,
ssa.OpAMD64VFMADDSUB213PD128load,
ssa.OpAMD64VFMADDSUB213PD256load,
ssa.OpAMD64VFMADDSUB213PD512load,
ssa.OpAMD64VFMSUBADD213PS128load,
ssa.OpAMD64VFMSUBADD213PS256load,
ssa.OpAMD64VFMSUBADD213PS512load,
ssa.OpAMD64VFMSUBADD213PD128load,
ssa.OpAMD64VFMSUBADD213PD256load,
ssa.OpAMD64VFMSUBADD213PD512load,
ssa.OpAMD64VPERMI2PS128load,
ssa.OpAMD64VPERMI2D128load,
ssa.OpAMD64VPERMI2PS256load,
ssa.OpAMD64VPERMI2D256load,
ssa.OpAMD64VPERMI2PS512load,
ssa.OpAMD64VPERMI2D512load,
ssa.OpAMD64VPERMI2PD128load,
ssa.OpAMD64VPERMI2Q128load,
ssa.OpAMD64VPERMI2PD256load,
ssa.OpAMD64VPERMI2Q256load,
ssa.OpAMD64VPERMI2PD512load,
ssa.OpAMD64VPERMI2Q512load,
ssa.OpAMD64VPSHLDVD128load,
ssa.OpAMD64VPSHLDVD256load,
ssa.OpAMD64VPSHLDVD512load,
ssa.OpAMD64VPSHLDVQ128load,
ssa.OpAMD64VPSHLDVQ256load,
ssa.OpAMD64VPSHLDVQ512load,
ssa.OpAMD64VPSHRDVD128load,
ssa.OpAMD64VPSHRDVD256load,
ssa.OpAMD64VPSHRDVD512load,
ssa.OpAMD64VPSHRDVQ128load,
ssa.OpAMD64VPSHRDVQ256load,
ssa.OpAMD64VPSHRDVQ512load:
p = simdV31loadResultInArg0(s, v)
case ssa.OpAMD64VPDPWSSDMasked128load,
ssa.OpAMD64VPDPWSSDMasked256load,
ssa.OpAMD64VPDPWSSDMasked512load,
ssa.OpAMD64VPDPWSSDSMasked128load,
ssa.OpAMD64VPDPWSSDSMasked256load,
ssa.OpAMD64VPDPWSSDSMasked512load,
ssa.OpAMD64VPDPBUSDMasked128load,
ssa.OpAMD64VPDPBUSDMasked256load,
ssa.OpAMD64VPDPBUSDMasked512load,
ssa.OpAMD64VPDPBUSDSMasked128load,
ssa.OpAMD64VPDPBUSDSMasked256load,
ssa.OpAMD64VPDPBUSDSMasked512load,
ssa.OpAMD64VFMADD213PSMasked128load,
ssa.OpAMD64VFMADD213PSMasked256load,
ssa.OpAMD64VFMADD213PSMasked512load,
ssa.OpAMD64VFMADD213PDMasked128load,
ssa.OpAMD64VFMADD213PDMasked256load,
ssa.OpAMD64VFMADD213PDMasked512load,
ssa.OpAMD64VFMADDSUB213PSMasked128load,
ssa.OpAMD64VFMADDSUB213PSMasked256load,
ssa.OpAMD64VFMADDSUB213PSMasked512load,
ssa.OpAMD64VFMADDSUB213PDMasked128load,
ssa.OpAMD64VFMADDSUB213PDMasked256load,
ssa.OpAMD64VFMADDSUB213PDMasked512load,
ssa.OpAMD64VFMSUBADD213PSMasked128load,
ssa.OpAMD64VFMSUBADD213PSMasked256load,
ssa.OpAMD64VFMSUBADD213PSMasked512load,
ssa.OpAMD64VFMSUBADD213PDMasked128load,
ssa.OpAMD64VFMSUBADD213PDMasked256load,
ssa.OpAMD64VFMSUBADD213PDMasked512load,
ssa.OpAMD64VPERMI2PSMasked128load,
ssa.OpAMD64VPERMI2DMasked128load,
ssa.OpAMD64VPERMI2PSMasked256load,
ssa.OpAMD64VPERMI2DMasked256load,
ssa.OpAMD64VPERMI2PSMasked512load,
ssa.OpAMD64VPERMI2DMasked512load,
ssa.OpAMD64VPERMI2PDMasked128load,
ssa.OpAMD64VPERMI2QMasked128load,
ssa.OpAMD64VPERMI2PDMasked256load,
ssa.OpAMD64VPERMI2QMasked256load,
ssa.OpAMD64VPERMI2PDMasked512load,
ssa.OpAMD64VPERMI2QMasked512load,
ssa.OpAMD64VPSHLDVDMasked128load,
ssa.OpAMD64VPSHLDVDMasked256load,
ssa.OpAMD64VPSHLDVDMasked512load,
ssa.OpAMD64VPSHLDVQMasked128load,
ssa.OpAMD64VPSHLDVQMasked256load,
ssa.OpAMD64VPSHLDVQMasked512load,
ssa.OpAMD64VPSHRDVDMasked128load,
ssa.OpAMD64VPSHRDVDMasked256load,
ssa.OpAMD64VPSHRDVDMasked512load,
ssa.OpAMD64VPSHRDVQMasked128load,
ssa.OpAMD64VPSHRDVQMasked256load,
ssa.OpAMD64VPSHRDVQMasked512load:
p = simdV3kvloadResultInArg0(s, v)
case ssa.OpAMD64VADDPSMasked128load,
ssa.OpAMD64VADDPSMasked256load,
ssa.OpAMD64VADDPSMasked512load,
ssa.OpAMD64VADDPDMasked128load,
ssa.OpAMD64VADDPDMasked256load,
ssa.OpAMD64VADDPDMasked512load,
ssa.OpAMD64VPADDDMasked128load,
ssa.OpAMD64VPADDDMasked256load,
ssa.OpAMD64VPADDDMasked512load,
ssa.OpAMD64VPADDQMasked128load,
ssa.OpAMD64VPADDQMasked256load,
ssa.OpAMD64VPADDQMasked512load,
ssa.OpAMD64VPANDDMasked128load,
ssa.OpAMD64VPANDDMasked256load,
ssa.OpAMD64VPANDDMasked512load,
ssa.OpAMD64VPANDQMasked128load,
ssa.OpAMD64VPANDQMasked256load,
ssa.OpAMD64VPANDQMasked512load,
ssa.OpAMD64VPANDNDMasked128load,
ssa.OpAMD64VPANDNDMasked256load,
ssa.OpAMD64VPANDNDMasked512load,
ssa.OpAMD64VPANDNQMasked128load,
ssa.OpAMD64VPANDNQMasked256load,
ssa.OpAMD64VPANDNQMasked512load,
ssa.OpAMD64VPACKSSDWMasked128load,
ssa.OpAMD64VPACKSSDWMasked256load,
ssa.OpAMD64VPACKSSDWMasked512load,
ssa.OpAMD64VPACKUSDWMasked128load,
ssa.OpAMD64VPACKUSDWMasked256load,
ssa.OpAMD64VPACKUSDWMasked512load,
ssa.OpAMD64VDIVPSMasked128load,
ssa.OpAMD64VDIVPSMasked256load,
ssa.OpAMD64VDIVPSMasked512load,
ssa.OpAMD64VDIVPDMasked128load,
ssa.OpAMD64VDIVPDMasked256load,
ssa.OpAMD64VDIVPDMasked512load,
ssa.OpAMD64VMAXPSMasked128load,
ssa.OpAMD64VMAXPSMasked256load,
ssa.OpAMD64VMAXPSMasked512load,
ssa.OpAMD64VMAXPDMasked128load,
ssa.OpAMD64VMAXPDMasked256load,
ssa.OpAMD64VMAXPDMasked512load,
ssa.OpAMD64VPMAXSDMasked128load,
ssa.OpAMD64VPMAXSDMasked256load,
ssa.OpAMD64VPMAXSDMasked512load,
ssa.OpAMD64VPMAXSQMasked128load,
ssa.OpAMD64VPMAXSQMasked256load,
ssa.OpAMD64VPMAXSQMasked512load,
ssa.OpAMD64VPMAXUDMasked128load,
ssa.OpAMD64VPMAXUDMasked256load,
ssa.OpAMD64VPMAXUDMasked512load,
ssa.OpAMD64VPMAXUQMasked128load,
ssa.OpAMD64VPMAXUQMasked256load,
ssa.OpAMD64VPMAXUQMasked512load,
ssa.OpAMD64VMINPSMasked128load,
ssa.OpAMD64VMINPSMasked256load,
ssa.OpAMD64VMINPSMasked512load,
ssa.OpAMD64VMINPDMasked128load,
ssa.OpAMD64VMINPDMasked256load,
ssa.OpAMD64VMINPDMasked512load,
ssa.OpAMD64VPMINSDMasked128load,
ssa.OpAMD64VPMINSDMasked256load,
ssa.OpAMD64VPMINSDMasked512load,
ssa.OpAMD64VPMINSQMasked128load,
ssa.OpAMD64VPMINSQMasked256load,
ssa.OpAMD64VPMINSQMasked512load,
ssa.OpAMD64VPMINUDMasked128load,
ssa.OpAMD64VPMINUDMasked256load,
ssa.OpAMD64VPMINUDMasked512load,
ssa.OpAMD64VPMINUQMasked128load,
ssa.OpAMD64VPMINUQMasked256load,
ssa.OpAMD64VPMINUQMasked512load,
ssa.OpAMD64VMULPSMasked128load,
ssa.OpAMD64VMULPSMasked256load,
ssa.OpAMD64VMULPSMasked512load,
ssa.OpAMD64VMULPDMasked128load,
ssa.OpAMD64VMULPDMasked256load,
ssa.OpAMD64VMULPDMasked512load,
ssa.OpAMD64VPMULLDMasked128load,
ssa.OpAMD64VPMULLDMasked256load,
ssa.OpAMD64VPMULLDMasked512load,
ssa.OpAMD64VPMULLQMasked128load,
ssa.OpAMD64VPMULLQMasked256load,
ssa.OpAMD64VPMULLQMasked512load,
ssa.OpAMD64VPORDMasked128load,
ssa.OpAMD64VPORDMasked256load,
ssa.OpAMD64VPORDMasked512load,
ssa.OpAMD64VPORQMasked128load,
ssa.OpAMD64VPORQMasked256load,
ssa.OpAMD64VPORQMasked512load,
ssa.OpAMD64VPERMPSMasked256load,
ssa.OpAMD64VPERMDMasked256load,
ssa.OpAMD64VPERMPSMasked512load,
ssa.OpAMD64VPERMDMasked512load,
ssa.OpAMD64VPERMPDMasked256load,
ssa.OpAMD64VPERMQMasked256load,
ssa.OpAMD64VPERMPDMasked512load,
ssa.OpAMD64VPERMQMasked512load,
ssa.OpAMD64VPROLVDMasked128load,
ssa.OpAMD64VPROLVDMasked256load,
ssa.OpAMD64VPROLVDMasked512load,
ssa.OpAMD64VPROLVQMasked128load,
ssa.OpAMD64VPROLVQMasked256load,
ssa.OpAMD64VPROLVQMasked512load,
ssa.OpAMD64VPRORVDMasked128load,
ssa.OpAMD64VPRORVDMasked256load,
ssa.OpAMD64VPRORVDMasked512load,
ssa.OpAMD64VPRORVQMasked128load,
ssa.OpAMD64VPRORVQMasked256load,
ssa.OpAMD64VPRORVQMasked512load,
ssa.OpAMD64VSCALEFPSMasked128load,
ssa.OpAMD64VSCALEFPSMasked256load,
ssa.OpAMD64VSCALEFPSMasked512load,
ssa.OpAMD64VSCALEFPDMasked128load,
ssa.OpAMD64VSCALEFPDMasked256load,
ssa.OpAMD64VSCALEFPDMasked512load,
ssa.OpAMD64VPSLLVDMasked128load,
ssa.OpAMD64VPSLLVDMasked256load,
ssa.OpAMD64VPSLLVDMasked512load,
ssa.OpAMD64VPSLLVQMasked128load,
ssa.OpAMD64VPSLLVQMasked256load,
ssa.OpAMD64VPSLLVQMasked512load,
ssa.OpAMD64VPSRAVDMasked128load,
ssa.OpAMD64VPSRAVDMasked256load,
ssa.OpAMD64VPSRAVDMasked512load,
ssa.OpAMD64VPSRAVQMasked128load,
ssa.OpAMD64VPSRAVQMasked256load,
ssa.OpAMD64VPSRAVQMasked512load,
ssa.OpAMD64VPSRLVDMasked128load,
ssa.OpAMD64VPSRLVDMasked256load,
ssa.OpAMD64VPSRLVDMasked512load,
ssa.OpAMD64VPSRLVQMasked128load,
ssa.OpAMD64VPSRLVQMasked256load,
ssa.OpAMD64VPSRLVQMasked512load,
ssa.OpAMD64VSUBPSMasked128load,
ssa.OpAMD64VSUBPSMasked256load,
ssa.OpAMD64VSUBPSMasked512load,
ssa.OpAMD64VSUBPDMasked128load,
ssa.OpAMD64VSUBPDMasked256load,
ssa.OpAMD64VSUBPDMasked512load,
ssa.OpAMD64VPSUBDMasked128load,
ssa.OpAMD64VPSUBDMasked256load,
ssa.OpAMD64VPSUBDMasked512load,
ssa.OpAMD64VPSUBQMasked128load,
ssa.OpAMD64VPSUBQMasked256load,
ssa.OpAMD64VPSUBQMasked512load,
ssa.OpAMD64VPXORDMasked128load,
ssa.OpAMD64VPXORDMasked256load,
ssa.OpAMD64VPXORDMasked512load,
ssa.OpAMD64VPXORQMasked128load,
ssa.OpAMD64VPXORQMasked256load,
ssa.OpAMD64VPXORQMasked512load,
ssa.OpAMD64VPBLENDMDMasked512load,
ssa.OpAMD64VPBLENDMQMasked512load:
p = simdV2kvload(s, v)
case ssa.OpAMD64VPCMPEQD512load,
ssa.OpAMD64VPCMPEQQ512load,
ssa.OpAMD64VPCMPGTD512load,
ssa.OpAMD64VPCMPGTQ512load:
p = simdV2kload(s, v)
case ssa.OpAMD64VPABSD128load,
ssa.OpAMD64VPABSD256load,
ssa.OpAMD64VPABSD512load,
ssa.OpAMD64VPABSQ128load,
ssa.OpAMD64VPABSQ256load,
ssa.OpAMD64VPABSQ512load,
ssa.OpAMD64VCVTTPS2DQ128load,
ssa.OpAMD64VCVTTPS2DQ256load,
ssa.OpAMD64VCVTTPS2DQ512load,
ssa.OpAMD64VCVTPS2UDQ128load,
ssa.OpAMD64VCVTPS2UDQ256load,
ssa.OpAMD64VCVTPS2UDQ512load,
ssa.OpAMD64VPOPCNTD128load,
ssa.OpAMD64VPOPCNTD256load,
ssa.OpAMD64VPOPCNTD512load,
ssa.OpAMD64VPOPCNTQ128load,
ssa.OpAMD64VPOPCNTQ256load,
ssa.OpAMD64VPOPCNTQ512load,
ssa.OpAMD64VRCP14PS512load,
ssa.OpAMD64VRCP14PD128load,
ssa.OpAMD64VRCP14PD256load,
ssa.OpAMD64VRCP14PD512load,
ssa.OpAMD64VRSQRT14PS512load,
ssa.OpAMD64VRSQRT14PD128load,
ssa.OpAMD64VRSQRT14PD256load,
ssa.OpAMD64VRSQRT14PD512load,
ssa.OpAMD64VSQRTPS128load,
ssa.OpAMD64VSQRTPS256load,
ssa.OpAMD64VSQRTPS512load,
ssa.OpAMD64VSQRTPD128load,
ssa.OpAMD64VSQRTPD256load,
ssa.OpAMD64VSQRTPD512load:
p = simdV11load(s, v)
default: default:
// Unknown reg shape // Unknown reg shape
return false return false

View file

@ -2126,6 +2126,91 @@ func simdV3kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
return p return p
} }
// Example instruction: VRCP14PS (DI), K6, X22
func simdVkvload(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.From, v)
p.AddRestSourceReg(maskReg(v.Args[1]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPSLLVD (DX), X7, X18
func simdV21load(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
ssagen.AddAux(&p.From, v)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPDPWSSD (SI), X24, X18
func simdV31loadResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[2].Reg()
ssagen.AddAux(&p.From, v)
p.AddRestSourceReg(simdReg(v.Args[1]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPDPWSSD (SI), X24, K1, X18
func simdV3kvloadResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[2].Reg()
ssagen.AddAux(&p.From, v)
p.AddRestSourceReg(simdReg(v.Args[1]))
p.AddRestSourceReg(maskReg(v.Args[3]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPSLLVD (SI), X1, K1, X2
func simdV2kvload(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
ssagen.AddAux(&p.From, v)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.AddRestSourceReg(maskReg(v.Args[2]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPCMPEQD (SI), X1, K1
func simdV2kload(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
ssagen.AddAux(&p.From, v)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.To.Type = obj.TYPE_REG
p.To.Reg = maskReg(v)
return p
}
// Example instruction: VCVTTPS2DQ (BX), X2
func simdV11load(s *ssagen.State, v *ssa.Value) *obj.Prog {
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
var blockJump = [...]struct { var blockJump = [...]struct {
asm, invasm obj.As asm, invasm obj.As
}{ }{

View file

@ -244,6 +244,19 @@ func init() {
wfpw = regInfo{inputs: []regMask{wz, fp}, outputs: wonly} wfpw = regInfo{inputs: []regMask{wz, fp}, outputs: wonly}
wfpkw = regInfo{inputs: []regMask{wz, fp, mask}, outputs: wonly} wfpkw = regInfo{inputs: []regMask{wz, fp, mask}, outputs: wonly}
// These register masks are used by SIMD only, they follow the pattern:
// Mem last, k mask second to last (if any), address right before mem and k mask.
wkwload = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: wonly}
v21load = regInfo{inputs: []regMask{vz, gpspsb, 0}, outputs: vonly}
v31load = regInfo{inputs: []regMask{v, vz, gpspsb, 0}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
v11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: vonly}
w21load = regInfo{inputs: []regMask{wz, gpspsb, 0}, outputs: wonly}
w31load = regInfo{inputs: []regMask{w, wz, gpspsb, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w2kload = regInfo{inputs: []regMask{wz, gpspsb, 0}, outputs: maskonly}
w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly}
w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly}
w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly} kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}} kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
gpk = regInfo{inputs: gponly, outputs: maskonly} gpk = regInfo{inputs: gponly, outputs: maskonly}
@ -1444,7 +1457,9 @@ func init() {
pkg: "cmd/internal/obj/x86", pkg: "cmd/internal/obj/x86",
genfile: "../../amd64/ssa.go", genfile: "../../amd64/ssa.go",
genSIMDfile: "../../amd64/simdssa.go", genSIMDfile: "../../amd64/simdssa.go",
ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw)...), // AMD64ops, ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv,
w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, wkwload, v21load, v31load, v11load,
w21load, w31load, w2kload, w2kwload, w11load, w3kwload)...), // AMD64ops,
blocks: AMD64blocks, blocks: AMD64blocks,
regnames: regNamesAMD64, regnames: regNamesAMD64,
ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11", ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11",

View file

@ -2,7 +2,8 @@
package main package main
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData { func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload regInfo) []opData {
return []opData{ return []opData{
{name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false},
@ -1309,5 +1310,499 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPABSD128load", argLength: 2, reg: v11load, asm: "VPABSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSD256load", argLength: 2, reg: v11load, asm: "VPABSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSD512load", argLength: 2, reg: w11load, asm: "VPABSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQ128load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQ256load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQ512load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSDMasked128load", argLength: 3, reg: wkwload, asm: "VPABSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSDMasked256load", argLength: 3, reg: wkwload, asm: "VPABSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSDMasked512load", argLength: 3, reg: wkwload, asm: "VPABSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQMasked128load", argLength: 3, reg: wkwload, asm: "VPABSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQMasked256load", argLength: 3, reg: wkwload, asm: "VPABSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQMasked512load", argLength: 3, reg: wkwload, asm: "VPABSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPS128load", argLength: 3, reg: v21load, asm: "VADDPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPS256load", argLength: 3, reg: v21load, asm: "VADDPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPS512load", argLength: 3, reg: w21load, asm: "VADDPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPD128load", argLength: 3, reg: v21load, asm: "VADDPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPD256load", argLength: 3, reg: v21load, asm: "VADDPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPD512load", argLength: 3, reg: w21load, asm: "VADDPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDD128load", argLength: 3, reg: v21load, asm: "VPADDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDD256load", argLength: 3, reg: v21load, asm: "VPADDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDD512load", argLength: 3, reg: w21load, asm: "VPADDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQ128load", argLength: 3, reg: v21load, asm: "VPADDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQ256load", argLength: 3, reg: v21load, asm: "VPADDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQ512load", argLength: 3, reg: w21load, asm: "VPADDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPDPWSSD128load", argLength: 4, reg: v31load, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSD256load", argLength: 4, reg: v31load, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDS128load", argLength: 4, reg: v31load, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDS256load", argLength: 4, reg: v31load, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDS512load", argLength: 4, reg: w31load, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSD128load", argLength: 4, reg: v31load, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSD256load", argLength: 4, reg: v31load, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDS128load", argLength: 4, reg: v31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDS256load", argLength: 4, reg: v31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDS512load", argLength: 4, reg: w31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPBUSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VADDPSMasked128load", argLength: 4, reg: w2kwload, asm: "VADDPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPSMasked256load", argLength: 4, reg: w2kwload, asm: "VADDPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPSMasked512load", argLength: 4, reg: w2kwload, asm: "VADDPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked128load", argLength: 4, reg: w2kwload, asm: "VADDPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked256load", argLength: 4, reg: w2kwload, asm: "VADDPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked512load", argLength: 4, reg: w2kwload, asm: "VADDPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPADDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPADDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDD512load", argLength: 3, reg: w21load, asm: "VPANDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDQ512load", argLength: 3, reg: w21load, asm: "VPANDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPANDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPANDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDND512load", argLength: 3, reg: w21load, asm: "VPANDND", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNQ512load", argLength: 3, reg: w21load, asm: "VPANDNQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNDMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDND", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNDMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDND", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNDMasked512load", argLength: 4, reg: w2kwload, asm: "VPANDND", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNQMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDNQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNQMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDNQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNQMasked512load", argLength: 4, reg: w2kwload, asm: "VPANDNQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDW128load", argLength: 3, reg: v21load, asm: "VPACKSSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDW256load", argLength: 3, reg: v21load, asm: "VPACKSSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDW512load", argLength: 3, reg: w21load, asm: "VPACKSSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPACKSSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPACKSSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDWMasked512load", argLength: 4, reg: w2kwload, asm: "VPACKSSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQ128load", argLength: 2, reg: v11load, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQ256load", argLength: 2, reg: v11load, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQ512load", argLength: 2, reg: w11load, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQMasked128load", argLength: 3, reg: wkwload, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQMasked256load", argLength: 3, reg: wkwload, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQMasked512load", argLength: 3, reg: wkwload, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDW128load", argLength: 3, reg: v21load, asm: "VPACKUSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDW256load", argLength: 3, reg: v21load, asm: "VPACKUSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDW512load", argLength: 3, reg: w21load, asm: "VPACKUSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPACKUSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPACKUSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDWMasked512load", argLength: 4, reg: w2kwload, asm: "VPACKUSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2UDQ128load", argLength: 2, reg: w11load, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2UDQ256load", argLength: 2, reg: w11load, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2UDQ512load", argLength: 2, reg: w11load, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2UDQMasked128load", argLength: 3, reg: wkwload, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2UDQMasked256load", argLength: 3, reg: wkwload, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2UDQMasked512load", argLength: 3, reg: wkwload, asm: "VCVTPS2UDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPS128load", argLength: 3, reg: v21load, asm: "VDIVPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPS256load", argLength: 3, reg: v21load, asm: "VDIVPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPS512load", argLength: 3, reg: w21load, asm: "VDIVPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPD128load", argLength: 3, reg: v21load, asm: "VDIVPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPD256load", argLength: 3, reg: v21load, asm: "VDIVPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPD512load", argLength: 3, reg: w21load, asm: "VDIVPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPSMasked128load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPSMasked256load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPSMasked512load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPDMasked128load", argLength: 4, reg: w2kwload, asm: "VDIVPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPDMasked256load", argLength: 4, reg: w2kwload, asm: "VDIVPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPDMasked512load", argLength: 4, reg: w2kwload, asm: "VDIVPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQD128load", argLength: 3, reg: v21load, asm: "VPCMPEQD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQD256load", argLength: 3, reg: v21load, asm: "VPCMPEQD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQD512load", argLength: 3, reg: w2kload, asm: "VPCMPEQD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQQ128load", argLength: 3, reg: v21load, asm: "VPCMPEQQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQQ256load", argLength: 3, reg: v21load, asm: "VPCMPEQQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD128load", argLength: 3, reg: v21load, asm: "VPCMPGTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD256load", argLength: 3, reg: v21load, asm: "VPCMPGTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ128load", argLength: 3, reg: v21load, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ256load", argLength: 3, reg: v21load, asm: "VPCMPGTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHQDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS128load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS256load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS512load", argLength: 3, reg: w21load, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPD128load", argLength: 3, reg: v21load, asm: "VMAXPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPD256load", argLength: 3, reg: v21load, asm: "VMAXPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPD512load", argLength: 3, reg: w21load, asm: "VMAXPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSD128load", argLength: 3, reg: v21load, asm: "VPMAXSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSD256load", argLength: 3, reg: v21load, asm: "VPMAXSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSD512load", argLength: 3, reg: w21load, asm: "VPMAXSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQ128load", argLength: 3, reg: w21load, asm: "VPMAXSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQ256load", argLength: 3, reg: w21load, asm: "VPMAXSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQ512load", argLength: 3, reg: w21load, asm: "VPMAXSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUD128load", argLength: 3, reg: v21load, asm: "VPMAXUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUD256load", argLength: 3, reg: v21load, asm: "VPMAXUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUD512load", argLength: 3, reg: w21load, asm: "VPMAXUD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQ128load", argLength: 3, reg: w21load, asm: "VPMAXUQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQ256load", argLength: 3, reg: w21load, asm: "VPMAXUQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQ512load", argLength: 3, reg: w21load, asm: "VPMAXUQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPSMasked128load", argLength: 4, reg: w2kwload, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPSMasked256load", argLength: 4, reg: w2kwload, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPSMasked512load", argLength: 4, reg: w2kwload, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPDMasked128load", argLength: 4, reg: w2kwload, asm: "VMAXPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPDMasked256load", argLength: 4, reg: w2kwload, asm: "VMAXPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPDMasked512load", argLength: 4, reg: w2kwload, asm: "VMAXPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSDMasked512load", argLength: 4, reg: w2kwload, asm: "VPMAXSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMAXSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUDMasked512load", argLength: 4, reg: w2kwload, asm: "VPMAXUD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXUQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXUQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMAXUQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPS128load", argLength: 3, reg: v21load, asm: "VMINPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPS256load", argLength: 3, reg: v21load, asm: "VMINPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPS512load", argLength: 3, reg: w21load, asm: "VMINPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPD128load", argLength: 3, reg: v21load, asm: "VMINPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPD256load", argLength: 3, reg: v21load, asm: "VMINPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPD512load", argLength: 3, reg: w21load, asm: "VMINPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSD128load", argLength: 3, reg: v21load, asm: "VPMINSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSD256load", argLength: 3, reg: v21load, asm: "VPMINSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSD512load", argLength: 3, reg: w21load, asm: "VPMINSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQ128load", argLength: 3, reg: w21load, asm: "VPMINSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQ256load", argLength: 3, reg: w21load, asm: "VPMINSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQ512load", argLength: 3, reg: w21load, asm: "VPMINSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUD128load", argLength: 3, reg: v21load, asm: "VPMINUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUD256load", argLength: 3, reg: v21load, asm: "VPMINUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUD512load", argLength: 3, reg: w21load, asm: "VPMINUD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQ128load", argLength: 3, reg: w21load, asm: "VPMINUQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQ256load", argLength: 3, reg: w21load, asm: "VPMINUQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQ512load", argLength: 3, reg: w21load, asm: "VPMINUQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPSMasked128load", argLength: 4, reg: w2kwload, asm: "VMINPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPSMasked256load", argLength: 4, reg: w2kwload, asm: "VMINPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPSMasked512load", argLength: 4, reg: w2kwload, asm: "VMINPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPDMasked128load", argLength: 4, reg: w2kwload, asm: "VMINPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPDMasked256load", argLength: 4, reg: w2kwload, asm: "VMINPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPDMasked512load", argLength: 4, reg: w2kwload, asm: "VMINPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSDMasked512load", argLength: 4, reg: w2kwload, asm: "VPMINSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMINSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUDMasked512load", argLength: 4, reg: w2kwload, asm: "VPMINUD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINUQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINUQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMINUQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPS128load", argLength: 3, reg: v21load, asm: "VMULPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPS256load", argLength: 3, reg: v21load, asm: "VMULPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPS512load", argLength: 3, reg: w21load, asm: "VMULPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPD128load", argLength: 3, reg: v21load, asm: "VMULPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPD256load", argLength: 3, reg: v21load, asm: "VMULPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPD512load", argLength: 3, reg: w21load, asm: "VMULPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLD128load", argLength: 3, reg: v21load, asm: "VPMULLD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLD256load", argLength: 3, reg: v21load, asm: "VPMULLD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLD512load", argLength: 3, reg: w21load, asm: "VPMULLD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQ128load", argLength: 3, reg: w21load, asm: "VPMULLQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQ256load", argLength: 3, reg: w21load, asm: "VPMULLQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQ512load", argLength: 3, reg: w21load, asm: "VPMULLQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VFMADD213PS128load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PS256load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PS512load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PD128load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PD256load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PD512load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PS128load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PS256load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PS512load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PD128load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PD256load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PD512load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPMULDQ128load", argLength: 3, reg: v21load, asm: "VPMULDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULDQ256load", argLength: 3, reg: v21load, asm: "VPMULDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULUDQ128load", argLength: 3, reg: v21load, asm: "VPMULUDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULUDQ256load", argLength: 3, reg: v21load, asm: "VPMULUDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPSMasked128load", argLength: 4, reg: w2kwload, asm: "VMULPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPSMasked256load", argLength: 4, reg: w2kwload, asm: "VMULPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPSMasked512load", argLength: 4, reg: w2kwload, asm: "VMULPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPDMasked128load", argLength: 4, reg: w2kwload, asm: "VMULPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPDMasked256load", argLength: 4, reg: w2kwload, asm: "VMULPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPDMasked512load", argLength: 4, reg: w2kwload, asm: "VMULPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULLD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULLD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLDMasked512load", argLength: 4, reg: w2kwload, asm: "VPMULLD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULLQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULLQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMULLQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VFMSUBADD213PS128load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PS256load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PS512load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PD128load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PD256load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PD512load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPOPCNTD128load", argLength: 2, reg: w11load, asm: "VPOPCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTD256load", argLength: 2, reg: w11load, asm: "VPOPCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTD512load", argLength: 2, reg: w11load, asm: "VPOPCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQ128load", argLength: 2, reg: w11load, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQ256load", argLength: 2, reg: w11load, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQ512load", argLength: 2, reg: w11load, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTDMasked128load", argLength: 3, reg: wkwload, asm: "VPOPCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTDMasked256load", argLength: 3, reg: wkwload, asm: "VPOPCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTDMasked512load", argLength: 3, reg: wkwload, asm: "VPOPCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQMasked128load", argLength: 3, reg: wkwload, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQMasked256load", argLength: 3, reg: wkwload, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQMasked512load", argLength: 3, reg: wkwload, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORD512load", argLength: 3, reg: w21load, asm: "VPORD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORQ512load", argLength: 3, reg: w21load, asm: "VPORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORDMasked128load", argLength: 4, reg: w2kwload, asm: "VPORD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORDMasked256load", argLength: 4, reg: w2kwload, asm: "VPORD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORDMasked512load", argLength: 4, reg: w2kwload, asm: "VPORD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORQMasked128load", argLength: 4, reg: w2kwload, asm: "VPORQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORQMasked256load", argLength: 4, reg: w2kwload, asm: "VPORQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORQMasked512load", argLength: 4, reg: w2kwload, asm: "VPORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPS256load", argLength: 3, reg: v21load, asm: "VPERMPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMD256load", argLength: 3, reg: v21load, asm: "VPERMD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPS512load", argLength: 3, reg: w21load, asm: "VPERMPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMD512load", argLength: 3, reg: w21load, asm: "VPERMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPD256load", argLength: 3, reg: w21load, asm: "VPERMPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMQ256load", argLength: 3, reg: w21load, asm: "VPERMQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPD512load", argLength: 3, reg: w21load, asm: "VPERMPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMQ512load", argLength: 3, reg: w21load, asm: "VPERMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMI2PS128load", argLength: 4, reg: w31load, asm: "VPERMI2PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2D128load", argLength: 4, reg: w31load, asm: "VPERMI2D", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PS256load", argLength: 4, reg: w31load, asm: "VPERMI2PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2D256load", argLength: 4, reg: w31load, asm: "VPERMI2D", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PS512load", argLength: 4, reg: w31load, asm: "VPERMI2PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2D512load", argLength: 4, reg: w31load, asm: "VPERMI2D", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PD128load", argLength: 4, reg: w31load, asm: "VPERMI2PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2Q128load", argLength: 4, reg: w31load, asm: "VPERMI2Q", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PD256load", argLength: 4, reg: w31load, asm: "VPERMI2PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2Q256load", argLength: 4, reg: w31load, asm: "VPERMI2Q", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PD512load", argLength: 4, reg: w31load, asm: "VPERMI2PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2Q512load", argLength: 4, reg: w31load, asm: "VPERMI2Q", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PSMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2DMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2D", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PSMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2DMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2D", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PSMasked512load", argLength: 5, reg: w3kwload, asm: "VPERMI2PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2DMasked512load", argLength: 5, reg: w3kwload, asm: "VPERMI2D", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PDMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2QMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2Q", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PDMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2QMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2Q", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2PDMasked512load", argLength: 5, reg: w3kwload, asm: "VPERMI2PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2QMasked512load", argLength: 5, reg: w3kwload, asm: "VPERMI2Q", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMPSMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMDMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPSMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPDMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMQMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPDMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PS512load", argLength: 2, reg: w11load, asm: "VRCP14PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PD128load", argLength: 2, reg: w11load, asm: "VRCP14PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PD256load", argLength: 2, reg: w11load, asm: "VRCP14PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PD512load", argLength: 2, reg: w11load, asm: "VRCP14PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PSMasked128load", argLength: 3, reg: wkwload, asm: "VRCP14PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PSMasked256load", argLength: 3, reg: wkwload, asm: "VRCP14PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PSMasked512load", argLength: 3, reg: wkwload, asm: "VRCP14PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PDMasked128load", argLength: 3, reg: wkwload, asm: "VRCP14PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PDMasked256load", argLength: 3, reg: wkwload, asm: "VRCP14PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PDMasked512load", argLength: 3, reg: wkwload, asm: "VRCP14PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PS512load", argLength: 2, reg: w11load, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PD128load", argLength: 2, reg: w11load, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PD256load", argLength: 2, reg: w11load, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PD512load", argLength: 2, reg: w11load, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PSMasked128load", argLength: 3, reg: wkwload, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PSMasked256load", argLength: 3, reg: wkwload, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PSMasked512load", argLength: 3, reg: wkwload, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PDMasked128load", argLength: 3, reg: wkwload, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PDMasked256load", argLength: 3, reg: wkwload, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PDMasked512load", argLength: 3, reg: wkwload, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVD128load", argLength: 3, reg: w21load, asm: "VPROLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVD256load", argLength: 3, reg: w21load, asm: "VPROLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVD512load", argLength: 3, reg: w21load, asm: "VPROLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVQ128load", argLength: 3, reg: w21load, asm: "VPROLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVQ256load", argLength: 3, reg: w21load, asm: "VPROLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVQ512load", argLength: 3, reg: w21load, asm: "VPROLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPROLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPROLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPROLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPROLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPROLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPROLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVD128load", argLength: 3, reg: w21load, asm: "VPRORVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVD256load", argLength: 3, reg: w21load, asm: "VPRORVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVD512load", argLength: 3, reg: w21load, asm: "VPRORVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQ128load", argLength: 3, reg: w21load, asm: "VPRORVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQ256load", argLength: 3, reg: w21load, asm: "VPRORVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQ512load", argLength: 3, reg: w21load, asm: "VPRORVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPRORVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPRORVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPRORVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPRORVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPRORVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPRORVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPS128load", argLength: 3, reg: w21load, asm: "VSCALEFPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPS256load", argLength: 3, reg: w21load, asm: "VSCALEFPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPS512load", argLength: 3, reg: w21load, asm: "VSCALEFPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPD128load", argLength: 3, reg: w21load, asm: "VSCALEFPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPD256load", argLength: 3, reg: w21load, asm: "VSCALEFPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPD512load", argLength: 3, reg: w21load, asm: "VSCALEFPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPSMasked128load", argLength: 4, reg: w2kwload, asm: "VSCALEFPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPSMasked256load", argLength: 4, reg: w2kwload, asm: "VSCALEFPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPSMasked512load", argLength: 4, reg: w2kwload, asm: "VSCALEFPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPDMasked128load", argLength: 4, reg: w2kwload, asm: "VSCALEFPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPDMasked256load", argLength: 4, reg: w2kwload, asm: "VSCALEFPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPDMasked512load", argLength: 4, reg: w2kwload, asm: "VSCALEFPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVD128load", argLength: 3, reg: v21load, asm: "VPSLLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVD256load", argLength: 3, reg: v21load, asm: "VPSLLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVD512load", argLength: 3, reg: w21load, asm: "VPSLLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQ128load", argLength: 3, reg: v21load, asm: "VPSLLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQ256load", argLength: 3, reg: v21load, asm: "VPSLLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQ512load", argLength: 3, reg: w21load, asm: "VPSLLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDVD128load", argLength: 4, reg: w31load, asm: "VPSHLDVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVD256load", argLength: 4, reg: w31load, asm: "VPSHLDVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVD512load", argLength: 4, reg: w31load, asm: "VPSHLDVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQ128load", argLength: 4, reg: w31load, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQ256load", argLength: 4, reg: w31load, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQ512load", argLength: 4, reg: w31load, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVDMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHLDVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVDMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHLDVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVDMasked512load", argLength: 5, reg: w3kwload, asm: "VPSHLDVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQMasked512load", argLength: 5, reg: w3kwload, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSLLVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSLLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSLLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSLLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSLLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSLLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSLLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVD128load", argLength: 3, reg: v21load, asm: "VPSRAVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVD256load", argLength: 3, reg: v21load, asm: "VPSRAVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVD512load", argLength: 3, reg: w21load, asm: "VPSRAVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQ128load", argLength: 3, reg: w21load, asm: "VPSRAVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQ256load", argLength: 3, reg: w21load, asm: "VPSRAVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQ512load", argLength: 3, reg: w21load, asm: "VPSRAVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVD128load", argLength: 3, reg: v21load, asm: "VPSRLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVD256load", argLength: 3, reg: v21load, asm: "VPSRLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVD512load", argLength: 3, reg: w21load, asm: "VPSRLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQ128load", argLength: 3, reg: v21load, asm: "VPSRLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQ256load", argLength: 3, reg: v21load, asm: "VPSRLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQ512load", argLength: 3, reg: w21load, asm: "VPSRLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDVD128load", argLength: 4, reg: w31load, asm: "VPSHRDVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVD256load", argLength: 4, reg: w31load, asm: "VPSHRDVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVD512load", argLength: 4, reg: w31load, asm: "VPSHRDVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQ128load", argLength: 4, reg: w31load, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQ256load", argLength: 4, reg: w31load, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQ512load", argLength: 4, reg: w31load, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVDMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHRDVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVDMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHRDVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVDMasked512load", argLength: 5, reg: w3kwload, asm: "VPSHRDVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQMasked512load", argLength: 5, reg: w3kwload, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSRAVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRAVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRAVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRAVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRAVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRAVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRAVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPS128load", argLength: 2, reg: v11load, asm: "VSQRTPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPS256load", argLength: 2, reg: v11load, asm: "VSQRTPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPS512load", argLength: 2, reg: w11load, asm: "VSQRTPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPD128load", argLength: 2, reg: v11load, asm: "VSQRTPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPD256load", argLength: 2, reg: v11load, asm: "VSQRTPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPD512load", argLength: 2, reg: w11load, asm: "VSQRTPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPSMasked128load", argLength: 3, reg: wkwload, asm: "VSQRTPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPSMasked256load", argLength: 3, reg: wkwload, asm: "VSQRTPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPSMasked512load", argLength: 3, reg: wkwload, asm: "VSQRTPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPDMasked128load", argLength: 3, reg: wkwload, asm: "VSQRTPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPDMasked256load", argLength: 3, reg: wkwload, asm: "VSQRTPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPDMasked512load", argLength: 3, reg: wkwload, asm: "VSQRTPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPS128load", argLength: 3, reg: v21load, asm: "VSUBPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPS256load", argLength: 3, reg: v21load, asm: "VSUBPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPS512load", argLength: 3, reg: w21load, asm: "VSUBPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPD128load", argLength: 3, reg: v21load, asm: "VSUBPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPD256load", argLength: 3, reg: v21load, asm: "VSUBPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPD512load", argLength: 3, reg: w21load, asm: "VSUBPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBD128load", argLength: 3, reg: v21load, asm: "VPSUBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBD256load", argLength: 3, reg: v21load, asm: "VPSUBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBD512load", argLength: 3, reg: w21load, asm: "VPSUBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQ128load", argLength: 3, reg: v21load, asm: "VPSUBQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQ256load", argLength: 3, reg: v21load, asm: "VPSUBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQ512load", argLength: 3, reg: w21load, asm: "VPSUBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPSMasked128load", argLength: 4, reg: w2kwload, asm: "VSUBPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPSMasked256load", argLength: 4, reg: w2kwload, asm: "VSUBPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPSMasked512load", argLength: 4, reg: w2kwload, asm: "VSUBPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPDMasked128load", argLength: 4, reg: w2kwload, asm: "VSUBPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPDMasked256load", argLength: 4, reg: w2kwload, asm: "VSUBPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPDMasked512load", argLength: 4, reg: w2kwload, asm: "VSUBPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSUBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSUBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORD512load", argLength: 3, reg: w21load, asm: "VPXORD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORQ512load", argLength: 3, reg: w21load, asm: "VPXORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORDMasked128load", argLength: 4, reg: w2kwload, asm: "VPXORD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORDMasked256load", argLength: 4, reg: w2kwload, asm: "VPXORD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORDMasked512load", argLength: 4, reg: w2kwload, asm: "VPXORD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORQMasked128load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORQMasked256load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORQMasked512load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBLENDMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBLENDMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
} }
} }

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@ package main
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"log"
"sort" "sort"
"strings" "strings"
) )
@ -14,13 +15,17 @@ import (
const simdMachineOpsTmpl = ` const simdMachineOpsTmpl = `
package main package main
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw regInfo) []opData { func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload regInfo) []opData {
return []opData{ return []opData{
{{- range .OpsData }} {{- range .OpsData }}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
{{- end }} {{- end }}
{{- range .OpsDataImm }} {{- range .OpsDataImm }}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
{{- end }}
{{- range .OpsDataload}}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
{{- end}} {{- end}}
} }
} }
@ -45,13 +50,17 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
type machineOpsData struct { type machineOpsData struct {
OpsData []opData OpsData []opData
OpsDataImm []opData OpsDataImm []opData
OpsDataload []opData
} }
regInfoSet := map[string]bool{ regInfoSet := map[string]bool{
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true, "v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true} "w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true,
"wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true,
"w3kwload": true}
opsData := make([]opData, 0) opsData := make([]opData, 0)
opsDataImm := make([]opData, 0) opsDataImm := make([]opData, 0)
opsDataload := make([]opData, 0)
// Determine the "best" version of an instruction to use // Determine the "best" version of an instruction to use
best := make(map[string]Operation) best := make(map[string]Operation)
@ -80,36 +89,42 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
} }
} }
regInfoErrs := make([]error, 0)
regInfoMissing := make(map[string]bool, 0)
for _, asm := range mOpOrder { for _, asm := range mOpOrder {
op := best[asm] op := best[asm]
shapeIn, shapeOut, _, _, gOp := op.shape() shapeIn, shapeOut, _, _, gOp := op.shape()
// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy // TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
// one here with a name suffix "Merging". The rewrite rules will need them. // one here with a name suffix "Merging". The rewrite rules will need them.
makeRegInfo := func(op Operation, mem memShape) (string, error) {
regInfo, err := op.regShape() regInfo, err := op.regShape(mem)
if err != nil { if err != nil {
panic(err) panic(err)
} }
idx, err := checkVecAsScalar(op) regInfo, err = rewriteVecAsScalarRegInfo(op, regInfo)
if err != nil { if err != nil {
if mem == NoMem || mem == InvalidMem {
panic(err) panic(err)
} }
if idx != -1 { return "", err
if regInfo == "v21" {
regInfo = "vfpv"
} else if regInfo == "v2kv" {
regInfo = "vfpkv"
} else {
panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op))
} }
if regInfo == "v01load" {
regInfo = "vload"
} }
// Makes AVX512 operations use upper registers // Makes AVX512 operations use upper registers
if strings.Contains(op.CPUFeature, "AVX512") { if strings.Contains(op.CPUFeature, "AVX512") {
regInfo = strings.ReplaceAll(regInfo, "v", "w") regInfo = strings.ReplaceAll(regInfo, "v", "w")
} }
if _, ok := regInfoSet[regInfo]; !ok { if _, ok := regInfoSet[regInfo]; !ok {
panic(fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s. Op is %s", regInfo, op)) regInfoErrs = append(regInfoErrs, fmt.Errorf("unsupported register constraint, please update the template and AMD64Ops.go: %s. Op is %s", regInfo, op))
regInfoMissing[regInfo] = true
}
return regInfo, nil
}
regInfo, err := makeRegInfo(op, NoMem)
if err != nil {
panic(err)
} }
var outType string var outType string
if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil { if shapeOut == OneVregOut || shapeOut == OneVregOutAtIn || gOp.Out[0].OverwriteClass != nil {
@ -128,9 +143,33 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
} }
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
// TODO: right now we put the uint8 immediates in [Aux] field, but for load this field needs to be occupied by SymOff.
// we should handle uint8 aux in [AuxInt]. Before that we will skip memory ops with imm.
} else { } else {
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0}) opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
// Right now we only have vbcst case
// Make a full vec memory variant.
op = rewriteLastVregToMem(op)
regInfo, err := makeRegInfo(op, VregMemIn)
if err != nil {
// Just skip it if it's non nill.
// an error could be triggered by [checkVecAsScalar].
// TODO: make [checkVecAsScalar] aware of mem ops.
if *Verbose {
log.Printf("Seen error: %e", err)
} }
} else {
opsDataload = append(opsDataload, opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0})
}
}
}
}
if len(regInfoErrs) != 0 {
for _, e := range regInfoErrs {
log.Printf("Errors: %e\n", e)
}
panic(fmt.Errorf("these regInfo unseen: %v", regInfoMissing))
} }
sort.Slice(opsData, func(i, j int) bool { sort.Slice(opsData, func(i, j int) bool {
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
@ -138,7 +177,10 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
sort.Slice(opsDataImm, func(i, j int) bool { sort.Slice(opsDataImm, func(i, j int) bool {
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
}) })
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm}) sort.Slice(opsDataload, func(i, j int) bool {
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
})
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataload})
if err != nil { if err != nil {
panic(fmt.Errorf("failed to execute template: %w", err)) panic(fmt.Errorf("failed to execute template: %w", err))
} }

View file

@ -7,6 +7,7 @@ package main
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"log"
"strings" "strings"
"text/template" "text/template"
) )
@ -80,6 +81,13 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
"vgpvImm8", "vgpvImm8",
"vgpImm8", "vgpImm8",
"v2kvImm8", "v2kvImm8",
"vkvload",
"v21load",
"v31loadResultInArg0",
"v3kvloadResultInArg0",
"v2kvload",
"v2kload",
"v11load",
} }
regInfoSet := map[string][]string{} regInfoSet := map[string][]string{}
for _, key := range regInfoKeys { for _, key := range regInfoKeys {
@ -88,10 +96,37 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
seen := map[string]struct{}{} seen := map[string]struct{}{}
allUnseen := make(map[string][]Operation) allUnseen := make(map[string][]Operation)
classifyOp := func(op Operation, shapeIn inShape, shapeOut outShape, caseStr string, mem memShape) error {
regShape, err := op.regShape(mem)
if err != nil {
return err
}
if regShape == "v01load" {
regShape = "vload"
}
if shapeOut == OneVregOutAtIn {
regShape += "ResultInArg0"
}
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
if mem == NoMem || mem == InvalidMem {
regShape += "Imm8"
} else {
return fmt.Errorf("simdgen cannot handle mem op with imm8 as of now")
}
}
regShape, err = rewriteVecAsScalarRegInfo(op, regShape)
if err != nil {
return err
}
if _, ok := regInfoSet[regShape]; !ok {
allUnseen[regShape] = append(allUnseen[regShape], op)
}
regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
return nil
}
for _, op := range ops { for _, op := range ops {
shapeIn, shapeOut, maskType, _, gOp := op.shape() shapeIn, shapeOut, maskType, _, gOp := op.shape()
asm := machineOpName(maskType, gOp) asm := machineOpName(maskType, gOp)
if _, ok := seen[asm]; ok { if _, ok := seen[asm]; ok {
continue continue
} }
@ -102,36 +137,28 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
ZeroingMask = append(ZeroingMask, caseStr) ZeroingMask = append(ZeroingMask, caseStr)
} }
} }
regShape, err := op.regShape() if err := classifyOp(op, shapeIn, shapeOut, caseStr, NoMem); err != nil {
if err != nil {
panic(err) panic(err)
} }
if shapeOut == OneVregOutAtIn { if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
regShape += "ResultInArg0" // Make a full vec memory variant
} op = rewriteLastVregToMem(op)
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { // Ignore the error
regShape += "Imm8" // an error could be triggered by [checkVecAsScalar].
} // TODO: make [checkVecAsScalar] aware of mem ops.
idx, err := checkVecAsScalar(op) if err := classifyOp(op, shapeIn, shapeOut, caseStr+"load", VregMemIn); err != nil {
if err != nil { if *Verbose {
panic(err) log.Printf("Seen error: %e", err)
}
if idx != -1 {
if regShape == "v21" {
regShape = "vfpv"
} else if regShape == "v2kv" {
regShape = "vfpkv"
} else {
panic(fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regShape, op))
} }
} }
if _, ok := regInfoSet[regShape]; !ok {
allUnseen[regShape] = append(allUnseen[regShape], op)
} }
regInfoSet[regShape] = append(regInfoSet[regShape], caseStr)
} }
if len(allUnseen) != 0 { if len(allUnseen) != 0 {
panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v", allUnseen)) allKeys := make([]string, 0)
for k := range allUnseen {
allKeys = append(allKeys, k)
}
panic(fmt.Errorf("unsupported register constraint for prog, please update gen_simdssa.go and amd64/ssa.go: %+v\nAll keys: %v", allUnseen, allKeys))
} }
buffer := new(bytes.Buffer) buffer := new(bytes.Buffer)

View file

@ -79,6 +79,7 @@ type inShape uint8
type outShape uint8 type outShape uint8
type maskShape uint8 type maskShape uint8
type immShape uint8 type immShape uint8
type memShape uint8
const ( const (
InvalidIn inShape = iota InvalidIn inShape = iota
@ -113,6 +114,12 @@ const (
ConstVarImm // a combination of user arg and const ConstVarImm // a combination of user arg and const
) )
const (
InvalidMem memShape = iota
NoMem
VregMemIn // The instruction contains a mem input which is loading a vreg.
)
// opShape returns the several integers describing the shape of the operation, // opShape returns the several integers describing the shape of the operation,
// and modified versions of the op: // and modified versions of the op:
// //
@ -227,17 +234,24 @@ func (op *Operation) shape() (shapeIn inShape, shapeOut outShape, maskType maskS
} }
// regShape returns a string representation of the register shape. // regShape returns a string representation of the register shape.
func (op *Operation) regShape() (string, error) { func (op *Operation) regShape(mem memShape) (string, error) {
_, _, _, _, gOp := op.shape() _, _, _, _, gOp := op.shape()
var regInfo string var regInfo string
var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt int var vRegInCnt, gRegInCnt, kMaskInCnt, vRegOutCnt, gRegOutCnt, kMaskOutCnt, memInCnt, memOutCnt int
for _, in := range gOp.In { for _, in := range gOp.In {
if in.Class == "vreg" { switch in.Class {
case "vreg":
vRegInCnt++ vRegInCnt++
} else if in.Class == "greg" { case "greg":
gRegInCnt++ gRegInCnt++
} else if in.Class == "mask" { case "mask":
kMaskInCnt++ kMaskInCnt++
case "memory":
if mem != VregMemIn {
panic("simdgen only knows VregMemIn in regShape")
}
memInCnt++
vRegInCnt++
} }
} }
for _, out := range gOp.Out { for _, out := range gOp.Out {
@ -248,6 +262,12 @@ func (op *Operation) regShape() (string, error) {
gRegOutCnt++ gRegOutCnt++
} else if out.Class == "mask" { } else if out.Class == "mask" {
kMaskOutCnt++ kMaskOutCnt++
} else if out.Class == "memory" {
if mem != VregMemIn {
panic("simdgen only knows VregMemIn in regShape")
}
vRegOutCnt++
memOutCnt++
} }
} }
var inRegs, inMasks, outRegs, outMasks string var inRegs, inMasks, outRegs, outMasks string
@ -279,6 +299,16 @@ func (op *Operation) regShape() (string, error) {
} else { } else {
regInfo = fmt.Sprintf("%s%s%s%s", inRegs, inMasks, outRegs, outMasks) regInfo = fmt.Sprintf("%s%s%s%s", inRegs, inMasks, outRegs, outMasks)
} }
if memInCnt > 0 {
if memInCnt == 1 {
regInfo += "load"
} else {
panic("simdgen does not understand more than 1 mem op as of now")
}
}
if memOutCnt > 0 {
panic("simdgen does not understand memory as output as of now")
}
return regInfo, nil return regInfo, nil
} }
@ -498,6 +528,42 @@ func checkVecAsScalar(op Operation) (idx int, err error) {
return return
} }
func rewriteVecAsScalarRegInfo(op Operation, regInfo string) (string, error) {
idx, err := checkVecAsScalar(op)
if err != nil {
return "", err
}
if idx != -1 {
if regInfo == "v21" {
regInfo = "vfpv"
} else if regInfo == "v2kv" {
regInfo = "vfpkv"
} else {
return "", fmt.Errorf("simdgen does not recognize uses of treatLikeAScalarOfSize with op regShape %s in op: %s", regInfo, op)
}
}
return regInfo, nil
}
func rewriteLastVregToMem(op Operation) Operation {
newIn := make([]Operand, len(op.In))
lastVregIdx := -1
for i := range len(op.In) {
newIn[i] = op.In[i]
if op.In[i].Class == "vreg" {
lastVregIdx = i
}
}
// vbcst operations put their mem op always as the last vreg.
if lastVregIdx == -1 {
panic("simdgen cannot find one vreg in the mem op vreg original")
}
newIn[lastVregIdx].Class = "memory"
op.In = newIn
return op
}
// dedup is deduping operations in the full structure level. // dedup is deduping operations in the full structure level.
func dedup(ops []Operation) (deduped []Operation) { func dedup(ops []Operation) (deduped []Operation) {
for _, op := range ops { for _, op := range ops {

View file

@ -55,7 +55,7 @@ type rawOperation struct {
In []Operand // Parameters In []Operand // Parameters
InVariant []Operand // Optional parameters InVariant []Operand // Optional parameters
Out []Operand // Results Out []Operand // Results
Mem string // Shape of memory operands MemFeatures *string // The memory operand feature this operation supports
Commutative bool // Commutativity Commutative bool // Commutativity
CPUFeature string // CPUID/Has* feature name CPUFeature string // CPUID/Has* feature name
Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z" Zeroing *bool // nil => use asm suffix ".Z"; false => do not use asm suffix ".Z"

View file

@ -121,22 +121,32 @@ func loadXED(xedPath string) []*unify.Value {
// First check the opcode // First check the opcode
// Keep this logic in sync with [decodeOperands] // Keep this logic in sync with [decodeOperands]
if ms, ok := memOps[opcode]; ok { if ms, ok := memOps[opcode]; ok {
feat1, ok1 := decodeCPUFeature(o.inst)
// Then check if there exist such an operation that for all vreg // Then check if there exist such an operation that for all vreg
// shapes they are the same at the same index // shapes they are the same at the same index
matchIdx := -1 matchIdx := -1
outer: outer:
for i, m := range ms { for i, m := range ms {
// Their CPU feature should match first
feat2, ok2 := decodeCPUFeature(m.inst)
if !ok1 || !ok2 {
continue
}
if feat1 != feat2 {
continue
}
if len(o.ops) == len(m.ops) { if len(o.ops) == len(m.ops) {
for j := range o.ops { for j := range o.ops {
v1, ok1 := o.ops[j].(operandVReg) v1, ok3 := o.ops[j].(operandVReg)
v2, ok2 := m.ops[j].(operandVReg) v2, ok4 := m.ops[j].(operandVReg)
if ok1 && ok2 { if !ok3 || !ok4 {
continue
}
if v1.vecShape != v2.vecShape { if v1.vecShape != v2.vecShape {
// A mismatch, skip this memOp // A mismatch, skip this memOp
continue outer continue outer
} }
} }
}
// Found a match, break early // Found a match, break early
matchIdx = i matchIdx = i
break break
@ -156,7 +166,9 @@ func loadXED(xedPath string) []*unify.Value {
} }
for _, ms := range memOps { for _, ms := range memOps {
for _, m := range ms { for _, m := range ms {
if *Verbose {
log.Printf("mem op not merged: %s, %v\n", m.inst.Opcode(), m) log.Printf("mem op not merged: %s, %v\n", m.inst.Opcode(), m)
}
appendDefs(m.inst, m.ops, nil) appendDefs(m.inst, m.ops, nil)
} }
} }
@ -632,7 +644,10 @@ func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVaria
instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...))) instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...)))
instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...))) instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...)))
instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...))) instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...)))
instDB.Add("mem", unify.NewValue(unify.NewStringExact(checkMem(ops)))) memFeatures := checkMem(ops)
if memFeatures != "noMem" {
instDB.Add("memFeatures", unify.NewValue(unify.NewStringExact(memFeatures)))
}
} }
// checkMem checks the shapes of memory operand in the operation and returns the shape. // checkMem checks the shapes of memory operand in the operation and returns the shape.