simd/archsimd: decode non-broadcast memory operands

This allows us to properly generate the ops and merge/load rules for
various SIMD instructions that can use memory operands.

Fixes #78159

Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-simd,gotip-linux-amd64_avx512-simd

Change-Id: Idec450c931c41bb903d4cc5b9b9ee8f610ee8796
Reviewed-on: https://go-review.googlesource.com/c/go/+/779521
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
Roland Shoemaker 2026-05-19 01:36:38 +00:00 committed by Roland Shoemaker
parent 856c405c4f
commit 4136ffed69
8 changed files with 25701 additions and 22 deletions

File diff suppressed because it is too large Load diff

View file

@ -2499,20 +2499,54 @@
(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VAESDECLAST128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESDECLAST128load {sym} [off] x ptr mem)
(VAESDECLAST256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESDECLAST256load {sym} [off] x ptr mem)
(VAESDEC128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESDEC128load {sym} [off] x ptr mem)
(VAESDEC256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESDEC256load {sym} [off] x ptr mem)
(VAESENCLAST128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESENCLAST128load {sym} [off] x ptr mem)
(VAESENCLAST256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESENCLAST256load {sym} [off] x ptr mem)
(VAESENC128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESENC128load {sym} [off] x ptr mem)
(VAESENC256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESENC256load {sym} [off] x ptr mem)
(VAESIMC128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESIMC128load {sym} [off] ptr mem)
(VAESKEYGENASSIST128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VAESKEYGENASSIST128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPABSB128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSB128load {sym} [off] ptr mem)
(VPABSB256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSB256load {sym} [off] ptr mem)
(VPABSW128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSW128load {sym} [off] ptr mem)
(VPABSW256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSW256load {sym} [off] ptr mem)
(VPABSD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD128load {sym} [off] ptr mem)
(VPABSD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD256load {sym} [off] ptr mem)
(VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem)
(VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem)
(VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem)
(VPABSQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ512load {sym} [off] ptr mem)
(VPABSBMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSBMasked128load {sym} [off] ptr mask mem)
(VPABSBMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSBMasked256load {sym} [off] ptr mask mem)
(VPABSWMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSWMasked128load {sym} [off] ptr mask mem)
(VPABSWMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSWMasked256load {sym} [off] ptr mask mem)
(VPABSDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked128load {sym} [off] ptr mask mem)
(VPABSDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked256load {sym} [off] ptr mask mem)
(VPABSDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked512load {sym} [off] ptr mask mem)
(VPABSQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked128load {sym} [off] ptr mask mem)
(VPABSQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked256load {sym} [off] ptr mask mem)
(VPABSQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked512load {sym} [off] ptr mask mem)
(VADDPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS128load {sym} [off] x ptr mem)
(VADDPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS256load {sym} [off] x ptr mem)
(VADDPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS512load {sym} [off] x ptr mem)
(VADDPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD128load {sym} [off] x ptr mem)
(VADDPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD256load {sym} [off] x ptr mem)
(VADDPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD512load {sym} [off] x ptr mem)
(VPADDB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDB128load {sym} [off] x ptr mem)
(VPADDB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDB256load {sym} [off] x ptr mem)
(VPADDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDW128load {sym} [off] x ptr mem)
(VPADDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDW256load {sym} [off] x ptr mem)
(VPADDD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD128load {sym} [off] x ptr mem)
(VPADDD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD256load {sym} [off] x ptr mem)
(VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD512load {sym} [off] x ptr mem)
(VPADDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ128load {sym} [off] x ptr mem)
(VPADDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ256load {sym} [off] x ptr mem)
(VPADDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ512load {sym} [off] x ptr mem)
(VPDPWSSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD128load {sym} [off] x y ptr mem)
(VPDPWSSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD256load {sym} [off] x y ptr mem)
(VPDPWSSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD512load {sym} [off] x y ptr mem)
(VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem)
(VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem)
@ -2523,12 +2557,48 @@
(VADDPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked128load {sym} [off] x ptr mask mem)
(VADDPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked256load {sym} [off] x ptr mask mem)
(VADDPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked512load {sym} [off] x ptr mask mem)
(VPADDBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDBMasked128load {sym} [off] x ptr mask mem)
(VPADDBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDBMasked256load {sym} [off] x ptr mask mem)
(VPADDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDWMasked128load {sym} [off] x ptr mask mem)
(VPADDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDWMasked256load {sym} [off] x ptr mask mem)
(VPADDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked128load {sym} [off] x ptr mask mem)
(VPADDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked256load {sym} [off] x ptr mask mem)
(VPADDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked512load {sym} [off] x ptr mask mem)
(VPADDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked128load {sym} [off] x ptr mask mem)
(VPADDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked256load {sym} [off] x ptr mask mem)
(VPADDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked512load {sym} [off] x ptr mask mem)
(VHADDPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHADDPS128load {sym} [off] x ptr mem)
(VHADDPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHADDPD128load {sym} [off] x ptr mem)
(VPHADDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHADDW128load {sym} [off] x ptr mem)
(VPHADDD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHADDD128load {sym} [off] x ptr mem)
(VHADDPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHADDPS256load {sym} [off] x ptr mem)
(VHADDPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHADDPD256load {sym} [off] x ptr mem)
(VPHADDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHADDW256load {sym} [off] x ptr mem)
(VPHADDD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHADDD256load {sym} [off] x ptr mem)
(VPHADDSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHADDSW128load {sym} [off] x ptr mem)
(VPHADDSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHADDSW256load {sym} [off] x ptr mem)
(VPADDSB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDSB128load {sym} [off] x ptr mem)
(VPADDSB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDSB256load {sym} [off] x ptr mem)
(VPADDSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDSW128load {sym} [off] x ptr mem)
(VPADDSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDSW256load {sym} [off] x ptr mem)
(VPADDUSB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDUSB128load {sym} [off] x ptr mem)
(VPADDUSB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDUSB256load {sym} [off] x ptr mem)
(VPADDUSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDUSW128load {sym} [off] x ptr mem)
(VPADDUSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDUSW256load {sym} [off] x ptr mem)
(VPADDSBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDSBMasked128load {sym} [off] x ptr mask mem)
(VPADDSBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDSBMasked256load {sym} [off] x ptr mask mem)
(VPADDSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDSWMasked128load {sym} [off] x ptr mask mem)
(VPADDSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDSWMasked256load {sym} [off] x ptr mask mem)
(VPADDUSBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDUSBMasked128load {sym} [off] x ptr mask mem)
(VPADDUSBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDUSBMasked256load {sym} [off] x ptr mask mem)
(VPADDUSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDUSWMasked128load {sym} [off] x ptr mask mem)
(VPADDUSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDUSWMasked256load {sym} [off] x ptr mask mem)
(VADDSUBPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDSUBPS128load {sym} [off] x ptr mem)
(VADDSUBPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDSUBPS256load {sym} [off] x ptr mem)
(VADDSUBPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDSUBPD128load {sym} [off] x ptr mem)
(VADDSUBPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDSUBPD256load {sym} [off] x ptr mem)
(VPAND128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPAND128load {sym} [off] x ptr mem)
(VPAND256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPAND256load {sym} [off] x ptr mem)
(VPANDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDD512load {sym} [off] x ptr mem)
(VPANDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDQ512load {sym} [off] x ptr mem)
(VPANDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked128load {sym} [off] x ptr mask mem)
@ -2537,6 +2607,8 @@
(VPANDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked128load {sym} [off] x ptr mask mem)
(VPANDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked256load {sym} [off] x ptr mask mem)
(VPANDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked512load {sym} [off] x ptr mask mem)
(VPANDN128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDN128load {sym} [off] x ptr mem)
(VPANDN256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDN256load {sym} [off] x ptr mem)
(VPANDND512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDND512load {sym} [off] x ptr mem)
(VPANDNQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDNQ512load {sym} [off] x ptr mem)
(VPANDNDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked128load {sym} [off] x ptr mask mem)
@ -2545,6 +2617,48 @@
(VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked128load {sym} [off] x ptr mask mem)
(VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked256load {sym} [off] x ptr mask mem)
(VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked512load {sym} [off] x ptr mask mem)
(VPAVGB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPAVGB128load {sym} [off] x ptr mem)
(VPAVGB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPAVGB256load {sym} [off] x ptr mem)
(VPAVGW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPAVGW128load {sym} [off] x ptr mem)
(VPAVGW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPAVGW256load {sym} [off] x ptr mem)
(VPAVGBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPAVGBMasked128load {sym} [off] x ptr mask mem)
(VPAVGBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPAVGBMasked256load {sym} [off] x ptr mask mem)
(VPAVGWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPAVGWMasked128load {sym} [off] x ptr mask mem)
(VPAVGWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPAVGWMasked256load {sym} [off] x ptr mask mem)
(VPBROADCASTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTQ128load {sym} [off] ptr mem)
(VPBROADCASTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTQMasked128load {sym} [off] ptr mask mem)
(VBROADCASTSS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && v.Block.CPUfeatures.hasFeature(CPUavx) && canMergeLoad(v, l) && clobber(l)=> (VBROADCASTSS128load {sym} [off] ptr mem)
(VBROADCASTSD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && v.Block.CPUfeatures.hasFeature(CPUavx) && canMergeLoad(v, l) && clobber(l)=> (VBROADCASTSD256load {sym} [off] ptr mem)
(VPBROADCASTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTD128load {sym} [off] ptr mem)
(VPBROADCASTQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTQ256load {sym} [off] ptr mem)
(VBROADCASTSSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSSMasked128load {sym} [off] ptr mask mem)
(VBROADCASTSDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSDMasked256load {sym} [off] ptr mask mem)
(VPBROADCASTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTDMasked128load {sym} [off] ptr mask mem)
(VPBROADCASTQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTQMasked256load {sym} [off] ptr mask mem)
(VBROADCASTSS256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && v.Block.CPUfeatures.hasFeature(CPUavx) && canMergeLoad(v, l) && clobber(l)=> (VBROADCASTSS256load {sym} [off] ptr mem)
(VBROADCASTSD512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSD512load {sym} [off] ptr mem)
(VPBROADCASTW128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTW128load {sym} [off] ptr mem)
(VPBROADCASTD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTD256load {sym} [off] ptr mem)
(VPBROADCASTQ512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTQ512load {sym} [off] ptr mem)
(VBROADCASTSSMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSSMasked256load {sym} [off] ptr mask mem)
(VBROADCASTSDMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSDMasked512load {sym} [off] ptr mask mem)
(VPBROADCASTDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTDMasked256load {sym} [off] ptr mask mem)
(VPBROADCASTQMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTQMasked512load {sym} [off] ptr mask mem)
(VBROADCASTSS512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSS512load {sym} [off] ptr mem)
(VPBROADCASTB128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTB128load {sym} [off] ptr mem)
(VPBROADCASTW256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTW256load {sym} [off] ptr mem)
(VPBROADCASTD512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTD512load {sym} [off] ptr mem)
(VBROADCASTSSMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VBROADCASTSSMasked512load {sym} [off] ptr mask mem)
(VPBROADCASTBMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTBMasked128load {sym} [off] ptr mask mem)
(VPBROADCASTDMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTDMasked512load {sym} [off] ptr mask mem)
(VPBROADCASTB256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTB256load {sym} [off] ptr mem)
(VPBROADCASTBMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTBMasked256load {sym} [off] ptr mask mem)
(VPBROADCASTB512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTB512load {sym} [off] ptr mem)
(VPBROADCASTBMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBROADCASTBMasked512load {sym} [off] ptr mask mem)
(VROUNDPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VROUNDPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VROUNDPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VROUNDPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VROUNDPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VROUNDPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VROUNDPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VROUNDPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
@ -2569,6 +2683,10 @@
(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPERMI2B128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2B128load {sym} [off] x y ptr mem)
(VPERMI2B256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2B256load {sym} [off] x y ptr mem)
(VPERMI2W128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2W128load {sym} [off] x y ptr mem)
(VPERMI2W256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2W256load {sym} [off] x y ptr mem)
(VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem)
(VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem)
(VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem)
@ -2581,6 +2699,10 @@
(VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q256load {sym} [off] x y ptr mem)
(VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD512load {sym} [off] x y ptr mem)
(VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q512load {sym} [off] x y ptr mem)
(VPERMI2BMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2BMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2BMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2BMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2WMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2WMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2WMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2WMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem)
@ -2593,7 +2715,15 @@
(VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem)
(VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem)
(VPALIGNR128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPALIGNR128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPALIGNR256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPALIGNR256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPALIGNRMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPALIGNRMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPALIGNRMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPALIGNRMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VCVTPD2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PSX128load {sym} [off] ptr mem)
(VCVTPD2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PSY128load {sym} [off] ptr mem)
(VCVTPD2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PS256load {sym} [off] ptr mem)
(VCVTDQ2PS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PS128load {sym} [off] ptr mem)
(VCVTDQ2PS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PS256load {sym} [off] ptr mem)
(VCVTDQ2PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PS512load {sym} [off] ptr mem)
(VCVTQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PSX128load {sym} [off] ptr mem)
(VCVTQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PSY128load {sym} [off] ptr mem)
@ -2619,7 +2749,9 @@
(VCVTUQQ2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSXMasked128load {sym} [off] ptr mask mem)
(VCVTUQQ2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSYMasked128load {sym} [off] ptr mask mem)
(VCVTUQQ2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSMasked256load {sym} [off] ptr mask mem)
(VCVTPS2PD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2PD256load {sym} [off] ptr mem)
(VCVTPS2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2PD512load {sym} [off] ptr mem)
(VCVTDQ2PD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PD256load {sym} [off] ptr mem)
(VCVTDQ2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PD512load {sym} [off] ptr mem)
(VCVTQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PD128load {sym} [off] ptr mem)
(VCVTQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PD256load {sym} [off] ptr mem)
@ -2641,7 +2773,11 @@
(VCVTUQQ2PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PDMasked128load {sym} [off] ptr mask mem)
(VCVTUQQ2PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PDMasked256load {sym} [off] ptr mask mem)
(VCVTUQQ2PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PDMasked512load {sym} [off] ptr mask mem)
(VCVTTPS2DQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ128load {sym} [off] ptr mem)
(VCVTTPS2DQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ256load {sym} [off] ptr mem)
(VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ512load {sym} [off] ptr mem)
(VCVTTPD2DQX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPD2DQX128load {sym} [off] ptr mem)
(VCVTTPD2DQY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPD2DQY128load {sym} [off] ptr mem)
(VCVTTPD2DQ256 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPD2DQ256load {sym} [off] ptr mem)
(VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem)
(VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem)
@ -2681,7 +2817,17 @@
(VCVTTPD2UQQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPD2UQQMasked128load {sym} [off] ptr mask mem)
(VCVTTPD2UQQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPD2UQQMasked256load {sym} [off] ptr mask mem)
(VCVTTPD2UQQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPD2UQQMasked512load {sym} [off] ptr mask mem)
(VPSIGNB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSIGNB128load {sym} [off] x ptr mem)
(VPSIGNB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSIGNB256load {sym} [off] x ptr mem)
(VPSIGNW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSIGNW128load {sym} [off] x ptr mem)
(VPSIGNW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSIGNW256load {sym} [off] x ptr mem)
(VPSIGND128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSIGND128load {sym} [off] x ptr mem)
(VPSIGND256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSIGND256load {sym} [off] x ptr mem)
(VDIVPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS128load {sym} [off] x ptr mem)
(VDIVPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS256load {sym} [off] x ptr mem)
(VDIVPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS512load {sym} [off] x ptr mem)
(VDIVPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD128load {sym} [off] x ptr mem)
(VDIVPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD256load {sym} [off] x ptr mem)
(VDIVPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD512load {sym} [off] x ptr mem)
(VDIVPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked128load {sym} [off] x ptr mask mem)
(VDIVPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked256load {sym} [off] x ptr mask mem)
@ -2689,9 +2835,29 @@
(VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem)
(VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem)
(VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem)
(VPMADDWD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMADDWD128load {sym} [off] x ptr mem)
(VPMADDWD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMADDWD256load {sym} [off] x ptr mem)
(VPMADDWDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMADDWDMasked128load {sym} [off] x ptr mask mem)
(VPMADDWDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMADDWDMasked256load {sym} [off] x ptr mask mem)
(VPMADDUBSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMADDUBSW128load {sym} [off] x ptr mem)
(VPMADDUBSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMADDUBSW256load {sym} [off] x ptr mem)
(VPMADDUBSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMADDUBSWMasked128load {sym} [off] x ptr mask mem)
(VPMADDUBSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMADDUBSWMasked256load {sym} [off] x ptr mask mem)
(VPCMPEQB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQB128load {sym} [off] x ptr mem)
(VPCMPEQB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQB256load {sym} [off] x ptr mem)
(VPCMPEQW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQW128load {sym} [off] x ptr mem)
(VPCMPEQW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQW256load {sym} [off] x ptr mem)
(VPCMPEQD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD128load {sym} [off] x ptr mem)
(VPCMPEQD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD256load {sym} [off] x ptr mem)
(VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem)
(VPCMPEQQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ128load {sym} [off] x ptr mem)
(VPCMPEQQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ256load {sym} [off] x ptr mem)
(VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem)
(VCMPPS128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VCMPPS256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VCMPPD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VCMPPD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
@ -2699,18 +2865,108 @@
(VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPWMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPWMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPWMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPWMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUWMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUWMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUWMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUWMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VEXPANDPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VEXPANDPSMasked128load {sym} [off] ptr mask mem)
(VEXPANDPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VEXPANDPSMasked256load {sym} [off] ptr mask mem)
(VEXPANDPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VEXPANDPDMasked128load {sym} [off] ptr mask mem)
(VEXPANDPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VEXPANDPDMasked256load {sym} [off] ptr mask mem)
(VPEXPANDBMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDBMasked128load {sym} [off] ptr mask mem)
(VPEXPANDBMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDBMasked256load {sym} [off] ptr mask mem)
(VPEXPANDWMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDWMasked128load {sym} [off] ptr mask mem)
(VPEXPANDWMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDWMasked256load {sym} [off] ptr mask mem)
(VPEXPANDDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDDMasked128load {sym} [off] ptr mask mem)
(VPEXPANDDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDDMasked256load {sym} [off] ptr mask mem)
(VPEXPANDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDQMasked128load {sym} [off] ptr mask mem)
(VPEXPANDQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPEXPANDQMasked256load {sym} [off] ptr mask mem)
(VPMOVSXBQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBQ128load {sym} [off] ptr mem)
(VPMOVSXWQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWQ128load {sym} [off] ptr mem)
(VPMOVSXDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXDQ128load {sym} [off] ptr mem)
(VPMOVSXWQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWQMasked128load {sym} [off] ptr mask mem)
(VPMOVSXDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXDQMasked128load {sym} [off] ptr mask mem)
(VPMOVZXBQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBQ128load {sym} [off] ptr mem)
(VPMOVZXWQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWQ128load {sym} [off] ptr mem)
(VPMOVZXDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXDQ128load {sym} [off] ptr mem)
(VPMOVZXWQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWQMasked128load {sym} [off] ptr mask mem)
(VPMOVZXDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXDQMasked128load {sym} [off] ptr mask mem)
(VPMOVSXBD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBD128load {sym} [off] ptr mem)
(VPMOVSXWD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWD128load {sym} [off] ptr mem)
(VPMOVSXBDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBDMasked128load {sym} [off] ptr mask mem)
(VPMOVSXWDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWDMasked128load {sym} [off] ptr mask mem)
(VPMOVSXBQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBQ256load {sym} [off] ptr mem)
(VPMOVSXWQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWQ256load {sym} [off] ptr mem)
(VPMOVSXBQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBQMasked256load {sym} [off] ptr mask mem)
(VPMOVSXWQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWQMasked256load {sym} [off] ptr mask mem)
(VPMOVZXBD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBD128load {sym} [off] ptr mem)
(VPMOVZXWD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWD128load {sym} [off] ptr mem)
(VPMOVZXBDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBDMasked128load {sym} [off] ptr mask mem)
(VPMOVZXWDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWDMasked128load {sym} [off] ptr mask mem)
(VPMOVZXBQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBQ256load {sym} [off] ptr mem)
(VPMOVZXWQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWQ256load {sym} [off] ptr mem)
(VPMOVZXBQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBQMasked256load {sym} [off] ptr mask mem)
(VPMOVZXWQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWQMasked256load {sym} [off] ptr mask mem)
(VPMOVSXBW128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBW128load {sym} [off] ptr mem)
(VPMOVSXBWMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBWMasked128load {sym} [off] ptr mask mem)
(VPMOVSXBD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBD256load {sym} [off] ptr mem)
(VPMOVSXBDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBDMasked256load {sym} [off] ptr mask mem)
(VPMOVSXBQ512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBQ512load {sym} [off] ptr mem)
(VPMOVSXBQMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBQMasked512load {sym} [off] ptr mask mem)
(VPMOVZXBW128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBW128load {sym} [off] ptr mem)
(VPMOVZXBWMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBWMasked128load {sym} [off] ptr mask mem)
(VPMOVZXBD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBD256load {sym} [off] ptr mem)
(VPMOVZXBDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBDMasked256load {sym} [off] ptr mask mem)
(VPMOVZXBQ512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBQ512load {sym} [off] ptr mem)
(VPMOVZXBQMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBQMasked512load {sym} [off] ptr mask mem)
(VPMOVSXBW256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBW256load {sym} [off] ptr mem)
(VPMOVSXBW512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBW512load {sym} [off] ptr mem)
(VPMOVSXBWMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBWMasked256load {sym} [off] ptr mask mem)
(VPMOVSXBWMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBWMasked512load {sym} [off] ptr mask mem)
(VPMOVSXBD512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBD512load {sym} [off] ptr mem)
(VPMOVSXWD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWD256load {sym} [off] ptr mem)
(VPMOVSXWD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWD512load {sym} [off] ptr mem)
(VPMOVSXBDMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXBDMasked512load {sym} [off] ptr mask mem)
(VPMOVSXWDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWDMasked256load {sym} [off] ptr mask mem)
(VPMOVSXWDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWDMasked512load {sym} [off] ptr mask mem)
(VPMOVSXWQ512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWQ512load {sym} [off] ptr mem)
(VPMOVSXDQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXDQ256load {sym} [off] ptr mem)
(VPMOVSXDQ512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXDQ512load {sym} [off] ptr mem)
(VPMOVSXWQMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXWQMasked512load {sym} [off] ptr mask mem)
(VPMOVSXDQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXDQMasked256load {sym} [off] ptr mask mem)
(VPMOVSXDQMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVSXDQMasked512load {sym} [off] ptr mask mem)
(VPMOVZXBW256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBW256load {sym} [off] ptr mem)
(VPMOVZXBW512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBW512load {sym} [off] ptr mem)
(VPMOVZXBWMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBWMasked256load {sym} [off] ptr mask mem)
(VPMOVZXBWMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBWMasked512load {sym} [off] ptr mask mem)
(VPMOVZXBD512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBD512load {sym} [off] ptr mem)
(VPMOVZXWD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWD256load {sym} [off] ptr mem)
(VPMOVZXWD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWD512load {sym} [off] ptr mem)
(VPMOVZXBDMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXBDMasked512load {sym} [off] ptr mask mem)
(VPMOVZXWDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWDMasked256load {sym} [off] ptr mask mem)
(VPMOVZXWDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWDMasked512load {sym} [off] ptr mask mem)
(VPMOVZXWQ512 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWQ512load {sym} [off] ptr mem)
(VPMOVZXDQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXDQ256load {sym} [off] ptr mem)
(VPMOVZXDQ512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXDQ512load {sym} [off] ptr mem)
(VPMOVZXWQMasked512 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXWQMasked512load {sym} [off] ptr mask mem)
(VPMOVZXDQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXDQMasked256load {sym} [off] ptr mask mem)
(VPMOVZXDQMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMOVZXDQMasked512load {sym} [off] ptr mask mem)
(VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
@ -2723,15 +2979,39 @@
(VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VGF2P8MULB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8MULB128load {sym} [off] x ptr mem)
(VGF2P8MULB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8MULB256load {sym} [off] x ptr mem)
(VGF2P8MULBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8MULBMasked128load {sym} [off] x ptr mask mem)
(VGF2P8MULBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8MULBMasked256load {sym} [off] x ptr mask mem)
(VPCMPGTB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTB128load {sym} [off] x ptr mem)
(VPCMPGTB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTB256load {sym} [off] x ptr mem)
(VPCMPGTW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTW128load {sym} [off] x ptr mem)
(VPCMPGTW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTW256load {sym} [off] x ptr mem)
(VPCMPGTD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD128load {sym} [off] x ptr mem)
(VPCMPGTD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD256load {sym} [off] x ptr mem)
(VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD512load {sym} [off] x ptr mem)
(VPCMPGTQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ128load {sym} [off] x ptr mem)
(VPCMPGTQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ256load {sym} [off] x ptr mem)
(VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ512load {sym} [off] x ptr mem)
(VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPUNPCKHWD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHWD128load {sym} [off] x ptr mem)
(VPUNPCKHDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ128load {sym} [off] x ptr mem)
(VPUNPCKHQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ128load {sym} [off] x ptr mem)
(VPUNPCKHWD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHWD256load {sym} [off] x ptr mem)
(VPUNPCKHDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ256load {sym} [off] x ptr mem)
(VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ512load {sym} [off] x ptr mem)
(VPUNPCKHQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ256load {sym} [off] x ptr mem)
(VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ512load {sym} [off] x ptr mem)
(VPUNPCKLWD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLWD128load {sym} [off] x ptr mem)
(VPUNPCKLDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ128load {sym} [off] x ptr mem)
(VPUNPCKLQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ128load {sym} [off] x ptr mem)
(VPUNPCKLWD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLWD256load {sym} [off] x ptr mem)
(VPUNPCKLDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ256load {sym} [off] x ptr mem)
(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem)
(VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem)
(VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem)
(VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD128load {sym} [off] ptr mem)
(VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD256load {sym} [off] ptr mem)
@ -2745,12 +3025,28 @@
(VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked128load {sym} [off] ptr mask mem)
(VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked256load {sym} [off] ptr mask mem)
(VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked512load {sym} [off] ptr mask mem)
(VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem)
(VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem)
(VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem)
(VMAXPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD128load {sym} [off] x ptr mem)
(VMAXPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD256load {sym} [off] x ptr mem)
(VMAXPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD512load {sym} [off] x ptr mem)
(VPMAXSB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSB128load {sym} [off] x ptr mem)
(VPMAXSB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSB256load {sym} [off] x ptr mem)
(VPMAXSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSW128load {sym} [off] x ptr mem)
(VPMAXSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSW256load {sym} [off] x ptr mem)
(VPMAXSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD128load {sym} [off] x ptr mem)
(VPMAXSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD256load {sym} [off] x ptr mem)
(VPMAXSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD512load {sym} [off] x ptr mem)
(VPMAXSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ128load {sym} [off] x ptr mem)
(VPMAXSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ256load {sym} [off] x ptr mem)
(VPMAXSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ512load {sym} [off] x ptr mem)
(VPMAXUB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUB128load {sym} [off] x ptr mem)
(VPMAXUB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUB256load {sym} [off] x ptr mem)
(VPMAXUW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUW128load {sym} [off] x ptr mem)
(VPMAXUW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUW256load {sym} [off] x ptr mem)
(VPMAXUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD128load {sym} [off] x ptr mem)
(VPMAXUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD256load {sym} [off] x ptr mem)
(VPMAXUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD512load {sym} [off] x ptr mem)
(VPMAXUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ128load {sym} [off] x ptr mem)
(VPMAXUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ256load {sym} [off] x ptr mem)
@ -2761,24 +3057,48 @@
(VMAXPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked128load {sym} [off] x ptr mask mem)
(VMAXPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked256load {sym} [off] x ptr mask mem)
(VMAXPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked512load {sym} [off] x ptr mask mem)
(VPMAXSBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSBMasked128load {sym} [off] x ptr mask mem)
(VPMAXSBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSBMasked256load {sym} [off] x ptr mask mem)
(VPMAXSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSWMasked128load {sym} [off] x ptr mask mem)
(VPMAXSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSWMasked256load {sym} [off] x ptr mask mem)
(VPMAXSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked128load {sym} [off] x ptr mask mem)
(VPMAXSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked256load {sym} [off] x ptr mask mem)
(VPMAXSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked512load {sym} [off] x ptr mask mem)
(VPMAXSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked128load {sym} [off] x ptr mask mem)
(VPMAXSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked256load {sym} [off] x ptr mask mem)
(VPMAXSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked512load {sym} [off] x ptr mask mem)
(VPMAXUBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUBMasked128load {sym} [off] x ptr mask mem)
(VPMAXUBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUBMasked256load {sym} [off] x ptr mask mem)
(VPMAXUWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUWMasked128load {sym} [off] x ptr mask mem)
(VPMAXUWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUWMasked256load {sym} [off] x ptr mask mem)
(VPMAXUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked128load {sym} [off] x ptr mask mem)
(VPMAXUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked256load {sym} [off] x ptr mask mem)
(VPMAXUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked512load {sym} [off] x ptr mask mem)
(VPMAXUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked128load {sym} [off] x ptr mask mem)
(VPMAXUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked256load {sym} [off] x ptr mask mem)
(VPMAXUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked512load {sym} [off] x ptr mask mem)
(VMINPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS128load {sym} [off] x ptr mem)
(VMINPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS256load {sym} [off] x ptr mem)
(VMINPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS512load {sym} [off] x ptr mem)
(VMINPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD128load {sym} [off] x ptr mem)
(VMINPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD256load {sym} [off] x ptr mem)
(VMINPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD512load {sym} [off] x ptr mem)
(VPMINSB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSB128load {sym} [off] x ptr mem)
(VPMINSB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSB256load {sym} [off] x ptr mem)
(VPMINSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSW128load {sym} [off] x ptr mem)
(VPMINSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSW256load {sym} [off] x ptr mem)
(VPMINSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD128load {sym} [off] x ptr mem)
(VPMINSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD256load {sym} [off] x ptr mem)
(VPMINSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD512load {sym} [off] x ptr mem)
(VPMINSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ128load {sym} [off] x ptr mem)
(VPMINSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ256load {sym} [off] x ptr mem)
(VPMINSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ512load {sym} [off] x ptr mem)
(VPMINUB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUB128load {sym} [off] x ptr mem)
(VPMINUB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUB256load {sym} [off] x ptr mem)
(VPMINUW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUW128load {sym} [off] x ptr mem)
(VPMINUW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUW256load {sym} [off] x ptr mem)
(VPMINUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD128load {sym} [off] x ptr mem)
(VPMINUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD256load {sym} [off] x ptr mem)
(VPMINUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD512load {sym} [off] x ptr mem)
(VPMINUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ128load {sym} [off] x ptr mem)
(VPMINUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ256load {sym} [off] x ptr mem)
@ -2789,25 +3109,45 @@
(VMINPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked128load {sym} [off] x ptr mask mem)
(VMINPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked256load {sym} [off] x ptr mask mem)
(VMINPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked512load {sym} [off] x ptr mask mem)
(VPMINSBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSBMasked128load {sym} [off] x ptr mask mem)
(VPMINSBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSBMasked256load {sym} [off] x ptr mask mem)
(VPMINSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSWMasked128load {sym} [off] x ptr mask mem)
(VPMINSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSWMasked256load {sym} [off] x ptr mask mem)
(VPMINSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked128load {sym} [off] x ptr mask mem)
(VPMINSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked256load {sym} [off] x ptr mask mem)
(VPMINSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked512load {sym} [off] x ptr mask mem)
(VPMINSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked128load {sym} [off] x ptr mask mem)
(VPMINSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked256load {sym} [off] x ptr mask mem)
(VPMINSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked512load {sym} [off] x ptr mask mem)
(VPMINUBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUBMasked128load {sym} [off] x ptr mask mem)
(VPMINUBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUBMasked256load {sym} [off] x ptr mask mem)
(VPMINUWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUWMasked128load {sym} [off] x ptr mask mem)
(VPMINUWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUWMasked256load {sym} [off] x ptr mask mem)
(VPMINUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked128load {sym} [off] x ptr mask mem)
(VPMINUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked256load {sym} [off] x ptr mask mem)
(VPMINUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked512load {sym} [off] x ptr mask mem)
(VPMINUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked128load {sym} [off] x ptr mask mem)
(VPMINUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked256load {sym} [off] x ptr mask mem)
(VPMINUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked512load {sym} [off] x ptr mask mem)
(VMULPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS128load {sym} [off] x ptr mem)
(VMULPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS256load {sym} [off] x ptr mem)
(VMULPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS512load {sym} [off] x ptr mem)
(VMULPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD128load {sym} [off] x ptr mem)
(VMULPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD256load {sym} [off] x ptr mem)
(VMULPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD512load {sym} [off] x ptr mem)
(VPMULLW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLW128load {sym} [off] x ptr mem)
(VPMULLW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLW256load {sym} [off] x ptr mem)
(VPMULLD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD128load {sym} [off] x ptr mem)
(VPMULLD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD256load {sym} [off] x ptr mem)
(VPMULLD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD512load {sym} [off] x ptr mem)
(VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ128load {sym} [off] x ptr mem)
(VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ256load {sym} [off] x ptr mem)
(VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ512load {sym} [off] x ptr mem)
(VFMADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS128load {sym} [off] x y ptr mem)
(VFMADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS256load {sym} [off] x y ptr mem)
(VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS512load {sym} [off] x y ptr mem)
(VFMADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD128load {sym} [off] x y ptr mem)
(VFMADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD256load {sym} [off] x y ptr mem)
(VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD512load {sym} [off] x y ptr mem)
(VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem)
(VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem)
@ -2815,7 +3155,11 @@
(VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem)
(VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem)
(VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS128load {sym} [off] x y ptr mem)
(VFMADDSUB213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS256load {sym} [off] x y ptr mem)
(VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS512load {sym} [off] x y ptr mem)
(VFMADDSUB213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD128load {sym} [off] x y ptr mem)
(VFMADDSUB213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD256load {sym} [off] x y ptr mem)
(VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD512load {sym} [off] x y ptr mem)
(VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem)
@ -2823,19 +3167,37 @@
(VFMADDSUB213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked128load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked256load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked512load {sym} [off] x y ptr mask mem)
(VPMULDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULDQ128load {sym} [off] x ptr mem)
(VPMULDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULDQ256load {sym} [off] x ptr mem)
(VPMULUDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULUDQ128load {sym} [off] x ptr mem)
(VPMULUDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULUDQ256load {sym} [off] x ptr mem)
(VPMULHW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULHW128load {sym} [off] x ptr mem)
(VPMULHW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULHW256load {sym} [off] x ptr mem)
(VPMULHUW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULHUW128load {sym} [off] x ptr mem)
(VPMULHUW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULHUW256load {sym} [off] x ptr mem)
(VPMULHWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULHWMasked128load {sym} [off] x ptr mask mem)
(VPMULHWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULHWMasked256load {sym} [off] x ptr mask mem)
(VPMULHUWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULHUWMasked128load {sym} [off] x ptr mask mem)
(VPMULHUWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULHUWMasked256load {sym} [off] x ptr mask mem)
(VMULPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked128load {sym} [off] x ptr mask mem)
(VMULPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked256load {sym} [off] x ptr mask mem)
(VMULPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked512load {sym} [off] x ptr mask mem)
(VMULPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked128load {sym} [off] x ptr mask mem)
(VMULPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked256load {sym} [off] x ptr mask mem)
(VMULPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked512load {sym} [off] x ptr mask mem)
(VPMULLWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLWMasked128load {sym} [off] x ptr mask mem)
(VPMULLWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLWMasked256load {sym} [off] x ptr mask mem)
(VPMULLDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked128load {sym} [off] x ptr mask mem)
(VPMULLDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked256load {sym} [off] x ptr mask mem)
(VPMULLDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked512load {sym} [off] x ptr mask mem)
(VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked128load {sym} [off] x ptr mask mem)
(VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked256load {sym} [off] x ptr mask mem)
(VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked512load {sym} [off] x ptr mask mem)
(VFMSUBADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS128load {sym} [off] x y ptr mem)
(VFMSUBADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS256load {sym} [off] x y ptr mem)
(VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS512load {sym} [off] x y ptr mem)
(VFMSUBADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD128load {sym} [off] x y ptr mem)
(VFMSUBADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD256load {sym} [off] x y ptr mem)
(VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD512load {sym} [off] x y ptr mem)
(VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem)
@ -2843,18 +3205,28 @@
(VFMSUBADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked128load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked256load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked512load {sym} [off] x y ptr mask mem)
(VPOPCNTB128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTB128load {sym} [off] ptr mem)
(VPOPCNTB256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTB256load {sym} [off] ptr mem)
(VPOPCNTW128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTW128load {sym} [off] ptr mem)
(VPOPCNTW256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTW256load {sym} [off] ptr mem)
(VPOPCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD128load {sym} [off] ptr mem)
(VPOPCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD256load {sym} [off] ptr mem)
(VPOPCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD512load {sym} [off] ptr mem)
(VPOPCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ128load {sym} [off] ptr mem)
(VPOPCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ256load {sym} [off] ptr mem)
(VPOPCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ512load {sym} [off] ptr mem)
(VPOPCNTBMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTBMasked128load {sym} [off] ptr mask mem)
(VPOPCNTBMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTBMasked256load {sym} [off] ptr mask mem)
(VPOPCNTWMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTWMasked128load {sym} [off] ptr mask mem)
(VPOPCNTWMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTWMasked256load {sym} [off] ptr mask mem)
(VPOPCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked128load {sym} [off] ptr mask mem)
(VPOPCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked256load {sym} [off] ptr mask mem)
(VPOPCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked512load {sym} [off] ptr mask mem)
(VPOPCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked128load {sym} [off] ptr mask mem)
(VPOPCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked256load {sym} [off] ptr mask mem)
(VPOPCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked512load {sym} [off] ptr mask mem)
(VPOR128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOR128load {sym} [off] x ptr mem)
(VPOR256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOR256load {sym} [off] x ptr mem)
(VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPORD512load {sym} [off] x ptr mem)
(VPORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPORQ512load {sym} [off] x ptr mem)
(VPORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked128load {sym} [off] x ptr mask mem)
@ -2863,12 +3235,22 @@
(VPORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked128load {sym} [off] x ptr mask mem)
(VPORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked256load {sym} [off] x ptr mask mem)
(VPORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked512load {sym} [off] x ptr mask mem)
(VPERMB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMB128load {sym} [off] x ptr mem)
(VPERMB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMB256load {sym} [off] x ptr mem)
(VPERMW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMW128load {sym} [off] x ptr mem)
(VPERMW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMW256load {sym} [off] x ptr mem)
(VPERMPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPS256load {sym} [off] x ptr mem)
(VPERMD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMD256load {sym} [off] x ptr mem)
(VPERMPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPS512load {sym} [off] x ptr mem)
(VPERMD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMD512load {sym} [off] x ptr mem)
(VPERMPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD256load {sym} [off] x ptr mem)
(VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ256load {sym} [off] x ptr mem)
(VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD512load {sym} [off] x ptr mem)
(VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ512load {sym} [off] x ptr mem)
(VPERMBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMBMasked128load {sym} [off] x ptr mask mem)
(VPERMBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMBMasked256load {sym} [off] x ptr mask mem)
(VPERMWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMWMasked128load {sym} [off] x ptr mask mem)
(VPERMWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMWMasked256load {sym} [off] x ptr mask mem)
(VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked256load {sym} [off] x ptr mask mem)
(VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked256load {sym} [off] x ptr mask mem)
(VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked512load {sym} [off] x ptr mask mem)
@ -2877,6 +3259,12 @@
(VPERMQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMQMasked256load {sym} [off] x ptr mask mem)
(VPERMPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPDMasked512load {sym} [off] x ptr mask mem)
(VPERMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMQMasked512load {sym} [off] x ptr mask mem)
(VPSHUFB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFB128load {sym} [off] x ptr mem)
(VPSHUFB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFB256load {sym} [off] x ptr mem)
(VPSHUFBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFBMasked256load {sym} [off] x ptr mask mem)
(VPSHUFBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFBMasked128load {sym} [off] x ptr mask mem)
(VRCPPS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCPPS128load {sym} [off] ptr mem)
(VRCPPS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCPPS256load {sym} [off] ptr mem)
(VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PS512load {sym} [off] ptr mem)
(VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD128load {sym} [off] ptr mem)
(VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD256load {sym} [off] ptr mem)
@ -2887,6 +3275,8 @@
(VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked128load {sym} [off] ptr mask mem)
(VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked256load {sym} [off] ptr mask mem)
(VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked512load {sym} [off] ptr mask mem)
(VRSQRTPS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRTPS128load {sym} [off] ptr mem)
(VRSQRTPS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRTPS256load {sym} [off] ptr mem)
(VRSQRT14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PS512load {sym} [off] ptr mem)
(VRSQRT14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD128load {sym} [off] ptr mem)
(VRSQRT14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD256load {sym} [off] ptr mem)
@ -2945,10 +3335,14 @@
(VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked128load {sym} [off] x ptr mask mem)
(VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem)
(VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem)
(VPACKSSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW128load {sym} [off] x ptr mem)
(VPACKSSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW256load {sym} [off] x ptr mem)
(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem)
(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem)
(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem)
(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
(VPACKUSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW128load {sym} [off] x ptr mem)
(VPACKUSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW256load {sym} [off] x ptr mem)
(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem)
(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem)
(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem)
@ -2965,81 +3359,129 @@
(VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked128load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked256load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked512load {sym} [off] x ptr mask mem)
(VPERM2F128256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERM2F128256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPERM2I128256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERM2I128256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VINSERTF128256 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VINSERTF128256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VINSERTF64X4512 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VINSERTF64X4512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VINSERTI128256 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VINSERTI128256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VINSERTI64X4512 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VINSERTI64X4512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDW128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDW128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDW256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDW256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHLDWMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDWMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDWMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDWMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDW128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDW128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDW256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDW256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHRDWMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDWMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDWMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDWMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSLLVW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVW128load {sym} [off] x ptr mem)
(VPSLLVW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVW256load {sym} [off] x ptr mem)
(VPSLLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD128load {sym} [off] x ptr mem)
(VPSLLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD256load {sym} [off] x ptr mem)
(VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD512load {sym} [off] x ptr mem)
(VPSLLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ128load {sym} [off] x ptr mem)
(VPSLLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ256load {sym} [off] x ptr mem)
(VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ512load {sym} [off] x ptr mem)
(VPSHLDVW128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVW128load {sym} [off] x y ptr mem)
(VPSHLDVW256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVW256load {sym} [off] x y ptr mem)
(VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD128load {sym} [off] x y ptr mem)
(VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD256load {sym} [off] x y ptr mem)
(VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD512load {sym} [off] x y ptr mem)
(VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ128load {sym} [off] x y ptr mem)
(VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ256load {sym} [off] x y ptr mem)
(VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ512load {sym} [off] x y ptr mem)
(VPSHLDVWMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVWMasked128load {sym} [off] x y ptr mask mem)
(VPSHLDVWMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVWMasked256load {sym} [off] x y ptr mask mem)
(VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem)
(VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem)
(VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem)
(VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem)
(VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem)
(VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem)
(VPSLLVWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVWMasked128load {sym} [off] x ptr mask mem)
(VPSLLVWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVWMasked256load {sym} [off] x ptr mask mem)
(VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked128load {sym} [off] x ptr mask mem)
(VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked256load {sym} [off] x ptr mask mem)
(VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked512load {sym} [off] x ptr mask mem)
(VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked128load {sym} [off] x ptr mask mem)
(VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked256load {sym} [off] x ptr mask mem)
(VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked512load {sym} [off] x ptr mask mem)
(VPSRAVW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVW128load {sym} [off] x ptr mem)
(VPSRAVW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVW256load {sym} [off] x ptr mem)
(VPSRAVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD128load {sym} [off] x ptr mem)
(VPSRAVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD256load {sym} [off] x ptr mem)
(VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD512load {sym} [off] x ptr mem)
(VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ128load {sym} [off] x ptr mem)
(VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ256load {sym} [off] x ptr mem)
(VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ512load {sym} [off] x ptr mem)
(VPSRLVW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVW128load {sym} [off] x ptr mem)
(VPSRLVW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVW256load {sym} [off] x ptr mem)
(VPSRLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD128load {sym} [off] x ptr mem)
(VPSRLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD256load {sym} [off] x ptr mem)
(VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD512load {sym} [off] x ptr mem)
(VPSRLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ128load {sym} [off] x ptr mem)
(VPSRLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ256load {sym} [off] x ptr mem)
(VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ512load {sym} [off] x ptr mem)
(VPSHRDVW128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVW128load {sym} [off] x y ptr mem)
(VPSHRDVW256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVW256load {sym} [off] x y ptr mem)
(VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD128load {sym} [off] x y ptr mem)
(VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD256load {sym} [off] x y ptr mem)
(VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD512load {sym} [off] x y ptr mem)
(VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ128load {sym} [off] x y ptr mem)
(VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ256load {sym} [off] x y ptr mem)
(VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ512load {sym} [off] x y ptr mem)
(VPSHRDVWMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVWMasked128load {sym} [off] x y ptr mask mem)
(VPSHRDVWMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVWMasked256load {sym} [off] x y ptr mask mem)
(VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem)
(VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem)
(VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem)
(VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem)
(VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem)
(VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem)
(VPSRAVWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVWMasked128load {sym} [off] x ptr mask mem)
(VPSRAVWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVWMasked256load {sym} [off] x ptr mask mem)
(VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked128load {sym} [off] x ptr mask mem)
(VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked256load {sym} [off] x ptr mask mem)
(VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked512load {sym} [off] x ptr mask mem)
(VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked128load {sym} [off] x ptr mask mem)
(VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked256load {sym} [off] x ptr mask mem)
(VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked512load {sym} [off] x ptr mask mem)
(VPSRLVWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVWMasked128load {sym} [off] x ptr mask mem)
(VPSRLVWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVWMasked256load {sym} [off] x ptr mask mem)
(VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked128load {sym} [off] x ptr mask mem)
(VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked256load {sym} [off] x ptr mask mem)
(VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked512load {sym} [off] x ptr mask mem)
(VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked128load {sym} [off] x ptr mask mem)
(VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked256load {sym} [off] x ptr mask mem)
(VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked512load {sym} [off] x ptr mask mem)
(VSQRTPS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS128load {sym} [off] ptr mem)
(VSQRTPS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS256load {sym} [off] ptr mem)
(VSQRTPS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS512load {sym} [off] ptr mem)
(VSQRTPD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD128load {sym} [off] ptr mem)
(VSQRTPD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD256load {sym} [off] ptr mem)
(VSQRTPD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD512load {sym} [off] ptr mem)
(VSQRTPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked128load {sym} [off] ptr mask mem)
(VSQRTPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked256load {sym} [off] ptr mask mem)
@ -3047,9 +3489,21 @@
(VSQRTPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked128load {sym} [off] ptr mask mem)
(VSQRTPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked256load {sym} [off] ptr mask mem)
(VSQRTPDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked512load {sym} [off] ptr mask mem)
(VSUBPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS128load {sym} [off] x ptr mem)
(VSUBPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS256load {sym} [off] x ptr mem)
(VSUBPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS512load {sym} [off] x ptr mem)
(VSUBPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD128load {sym} [off] x ptr mem)
(VSUBPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD256load {sym} [off] x ptr mem)
(VSUBPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD512load {sym} [off] x ptr mem)
(VPSUBB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBB128load {sym} [off] x ptr mem)
(VPSUBB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBB256load {sym} [off] x ptr mem)
(VPSUBW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBW128load {sym} [off] x ptr mem)
(VPSUBW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBW256load {sym} [off] x ptr mem)
(VPSUBD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD128load {sym} [off] x ptr mem)
(VPSUBD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD256load {sym} [off] x ptr mem)
(VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD512load {sym} [off] x ptr mem)
(VPSUBQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ128load {sym} [off] x ptr mem)
(VPSUBQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ256load {sym} [off] x ptr mem)
(VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ512load {sym} [off] x ptr mem)
(VSUBPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked128load {sym} [off] x ptr mask mem)
(VSUBPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked256load {sym} [off] x ptr mask mem)
@ -3057,12 +3511,46 @@
(VSUBPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked128load {sym} [off] x ptr mask mem)
(VSUBPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked256load {sym} [off] x ptr mask mem)
(VSUBPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked512load {sym} [off] x ptr mask mem)
(VPSUBBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBBMasked128load {sym} [off] x ptr mask mem)
(VPSUBBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBBMasked256load {sym} [off] x ptr mask mem)
(VPSUBWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBWMasked128load {sym} [off] x ptr mask mem)
(VPSUBWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBWMasked256load {sym} [off] x ptr mask mem)
(VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked128load {sym} [off] x ptr mask mem)
(VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked256load {sym} [off] x ptr mask mem)
(VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked512load {sym} [off] x ptr mask mem)
(VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked128load {sym} [off] x ptr mask mem)
(VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked256load {sym} [off] x ptr mask mem)
(VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked512load {sym} [off] x ptr mask mem)
(VHSUBPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHSUBPS128load {sym} [off] x ptr mem)
(VHSUBPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHSUBPD128load {sym} [off] x ptr mem)
(VPHSUBW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHSUBW128load {sym} [off] x ptr mem)
(VPHSUBD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHSUBD128load {sym} [off] x ptr mem)
(VHSUBPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHSUBPS256load {sym} [off] x ptr mem)
(VHSUBPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VHSUBPD256load {sym} [off] x ptr mem)
(VPHSUBW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHSUBW256load {sym} [off] x ptr mem)
(VPHSUBD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHSUBD256load {sym} [off] x ptr mem)
(VPHSUBSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHSUBSW128load {sym} [off] x ptr mem)
(VPHSUBSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPHSUBSW256load {sym} [off] x ptr mem)
(VPSUBSB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBSB128load {sym} [off] x ptr mem)
(VPSUBSB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBSB256load {sym} [off] x ptr mem)
(VPSUBSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBSW128load {sym} [off] x ptr mem)
(VPSUBSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBSW256load {sym} [off] x ptr mem)
(VPSUBUSB128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSB128load {sym} [off] x ptr mem)
(VPSUBUSB256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSB256load {sym} [off] x ptr mem)
(VPSUBUSW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSW128load {sym} [off] x ptr mem)
(VPSUBUSW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSW256load {sym} [off] x ptr mem)
(VPSUBSBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBSBMasked128load {sym} [off] x ptr mask mem)
(VPSUBSBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBSBMasked256load {sym} [off] x ptr mask mem)
(VPSUBSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBSWMasked128load {sym} [off] x ptr mask mem)
(VPSUBSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBSWMasked256load {sym} [off] x ptr mask mem)
(VPSUBUSBMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSBMasked128load {sym} [off] x ptr mask mem)
(VPSUBUSBMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSBMasked256load {sym} [off] x ptr mask mem)
(VPSUBUSWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSWMasked128load {sym} [off] x ptr mask mem)
(VPSUBUSWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBUSWMasked256load {sym} [off] x ptr mask mem)
(VPSADBW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSADBW128load {sym} [off] x ptr mem)
(VPSADBW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSADBW256load {sym} [off] x ptr mem)
(VPXOR128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXOR128load {sym} [off] x ptr mem)
(VPXOR256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXOR256load {sym} [off] x ptr mem)
(VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXORD512load {sym} [off] x ptr mem)
(VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXORQ512load {sym} [off] x ptr mem)
(VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked128load {sym} [off] x ptr mask mem)
@ -3073,14 +3561,32 @@
(VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked512load {sym} [off] x ptr mask mem)
(VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMDMasked512load {sym} [off] x ptr mask mem)
(VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem)
(VPCLMULQDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCLMULQDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPCLMULQDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCLMULQDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VSHUFPS128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VSHUFPD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VSHUFPS256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VSHUFPD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPSHUFD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSHUFHW128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFHW128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFHW256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFHW256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFHWMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFHWMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSHUFHWMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFHWMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSHUFLW128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFLW128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFLW256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFLW256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSHUFLWMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFLWMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSHUFLWMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFLWMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSLLWMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLWMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSLLWMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLWMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
@ -3093,12 +3599,16 @@
(VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
(VPSRLWMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLWMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLWMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLWMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRAWMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAWMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRAWMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAWMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)

View file

@ -1490,25 +1490,58 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VSHUFPS128", argLength: 2, reg: v21, asm: "VSHUFPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VSHUFPS256", argLength: 2, reg: v21, asm: "VSHUFPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VSHUFPS512", argLength: 2, reg: w21, asm: "VSHUFPS", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VADDPD128load", argLength: 3, reg: v21load, asm: "VADDPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPD256load", argLength: 3, reg: v21load, asm: "VADDPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPD512load", argLength: 3, reg: w21load, asm: "VADDPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked128load", argLength: 4, reg: w2kwload, asm: "VADDPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked256load", argLength: 4, reg: w2kwload, asm: "VADDPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked512load", argLength: 4, reg: w2kwload, asm: "VADDPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPS128load", argLength: 3, reg: v21load, asm: "VADDPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPS256load", argLength: 3, reg: v21load, asm: "VADDPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPS512load", argLength: 3, reg: w21load, asm: "VADDPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPSMasked128load", argLength: 4, reg: w2kwload, asm: "VADDPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPSMasked256load", argLength: 4, reg: w2kwload, asm: "VADDPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPSMasked512load", argLength: 4, reg: w2kwload, asm: "VADDPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDSUBPD128load", argLength: 3, reg: v21load, asm: "VADDSUBPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDSUBPD256load", argLength: 3, reg: v21load, asm: "VADDSUBPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDSUBPS128load", argLength: 3, reg: v21load, asm: "VADDSUBPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VADDSUBPS256load", argLength: 3, reg: v21load, asm: "VADDSUBPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESDEC128load", argLength: 3, reg: v21load, asm: "VAESDEC", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESDEC256load", argLength: 3, reg: v21load, asm: "VAESDEC", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESDECLAST128load", argLength: 3, reg: v21load, asm: "VAESDECLAST", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESDECLAST256load", argLength: 3, reg: v21load, asm: "VAESDECLAST", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESENC128load", argLength: 3, reg: v21load, asm: "VAESENC", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESENC256load", argLength: 3, reg: v21load, asm: "VAESENC", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESENCLAST128load", argLength: 3, reg: v21load, asm: "VAESENCLAST", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESENCLAST256load", argLength: 3, reg: v21load, asm: "VAESENCLAST", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESIMC128load", argLength: 2, reg: v11load, asm: "VAESIMC", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSD256load", argLength: 2, reg: v11load, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSD512load", argLength: 2, reg: w11load, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSDMasked256load", argLength: 3, reg: wkwload, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSDMasked512load", argLength: 3, reg: wkwload, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSS128load", argLength: 2, reg: v11load, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSS256load", argLength: 2, reg: v11load, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSS512load", argLength: 2, reg: w11load, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSSMasked128load", argLength: 3, reg: wkwload, asm: "VBROADCASTSS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSSMasked256load", argLength: 3, reg: wkwload, asm: "VBROADCASTSS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VBROADCASTSSMasked512load", argLength: 3, reg: wkwload, asm: "VBROADCASTSS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PD256load", argLength: 2, reg: v11load, asm: "VCVTDQ2PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PD512load", argLength: 2, reg: w11load, asm: "VCVTDQ2PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PDMasked256load", argLength: 3, reg: wkwload, asm: "VCVTDQ2PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PDMasked512load", argLength: 3, reg: wkwload, asm: "VCVTDQ2PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PS128load", argLength: 2, reg: v11load, asm: "VCVTDQ2PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PS256load", argLength: 2, reg: v11load, asm: "VCVTDQ2PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PS512load", argLength: 2, reg: w11load, asm: "VCVTDQ2PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PSMasked128load", argLength: 3, reg: wkwload, asm: "VCVTDQ2PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PSMasked256load", argLength: 3, reg: wkwload, asm: "VCVTDQ2PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTDQ2PSMasked512load", argLength: 3, reg: wkwload, asm: "VCVTDQ2PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPD2PS256load", argLength: 2, reg: w11load, asm: "VCVTPD2PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPD2PSMasked256load", argLength: 3, reg: wkwload, asm: "VCVTPD2PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPD2PSX128load", argLength: 2, reg: v11load, asm: "VCVTPD2PSX", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPD2PSXMasked128load", argLength: 3, reg: wkwload, asm: "VCVTPD2PSX", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPD2PSY128load", argLength: 2, reg: v11load, asm: "VCVTPD2PSY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPD2PSYMasked128load", argLength: 3, reg: wkwload, asm: "VCVTPD2PSY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2PD256load", argLength: 2, reg: v11load, asm: "VCVTPS2PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2PD512load", argLength: 2, reg: w11load, asm: "VCVTPS2PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2PDMasked256load", argLength: 3, reg: wkwload, asm: "VCVTPS2PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTPS2PDMasked512load", argLength: 3, reg: wkwload, asm: "VCVTPS2PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1526,7 +1559,9 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VCVTQQ2PSYMasked128load", argLength: 3, reg: wkwload, asm: "VCVTQQ2PSY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2DQ256load", argLength: 2, reg: w11load, asm: "VCVTTPD2DQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2DQMasked256load", argLength: 3, reg: wkwload, asm: "VCVTTPD2DQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2DQX128load", argLength: 2, reg: v11load, asm: "VCVTTPD2DQX", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2DQXMasked128load", argLength: 3, reg: wkwload, asm: "VCVTTPD2DQX", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2DQY128load", argLength: 2, reg: v11load, asm: "VCVTTPD2DQY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2DQYMasked128load", argLength: 3, reg: wkwload, asm: "VCVTTPD2DQY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2QQ128load", argLength: 2, reg: w11load, asm: "VCVTTPD2QQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2QQ256load", argLength: 2, reg: w11load, asm: "VCVTTPD2QQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1546,6 +1581,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VCVTTPD2UQQMasked128load", argLength: 3, reg: wkwload, asm: "VCVTTPD2UQQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2UQQMasked256load", argLength: 3, reg: wkwload, asm: "VCVTTPD2UQQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPD2UQQMasked512load", argLength: 3, reg: wkwload, asm: "VCVTTPD2UQQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQ128load", argLength: 2, reg: v11load, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQ256load", argLength: 2, reg: v11load, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQ512load", argLength: 2, reg: w11load, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQMasked128load", argLength: 3, reg: wkwload, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTTPS2DQMasked256load", argLength: 3, reg: wkwload, asm: "VCVTTPS2DQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1586,62 +1623,112 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VCVTUQQ2PSXMasked128load", argLength: 3, reg: wkwload, asm: "VCVTUQQ2PSX", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTUQQ2PSY128load", argLength: 2, reg: w11load, asm: "VCVTUQQ2PSY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VCVTUQQ2PSYMasked128load", argLength: 3, reg: wkwload, asm: "VCVTUQQ2PSY", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPD128load", argLength: 3, reg: v21load, asm: "VDIVPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPD256load", argLength: 3, reg: v21load, asm: "VDIVPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPD512load", argLength: 3, reg: w21load, asm: "VDIVPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPDMasked128load", argLength: 4, reg: w2kwload, asm: "VDIVPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPDMasked256load", argLength: 4, reg: w2kwload, asm: "VDIVPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPDMasked512load", argLength: 4, reg: w2kwload, asm: "VDIVPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPS128load", argLength: 3, reg: v21load, asm: "VDIVPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPS256load", argLength: 3, reg: v21load, asm: "VDIVPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPS512load", argLength: 3, reg: w21load, asm: "VDIVPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPSMasked128load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPSMasked256load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VDIVPSMasked512load", argLength: 4, reg: w2kwload, asm: "VDIVPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VEXPANDPDMasked128load", argLength: 3, reg: wkwload, asm: "VEXPANDPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VEXPANDPDMasked256load", argLength: 3, reg: wkwload, asm: "VEXPANDPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VEXPANDPSMasked128load", argLength: 3, reg: wkwload, asm: "VEXPANDPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VEXPANDPSMasked256load", argLength: 3, reg: wkwload, asm: "VEXPANDPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VFMADD213PD128load", argLength: 4, reg: v31load, asm: "VFMADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PD256load", argLength: 4, reg: v31load, asm: "VFMADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PD512load", argLength: 4, reg: w31load, asm: "VFMADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PS128load", argLength: 4, reg: v31load, asm: "VFMADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PS256load", argLength: 4, reg: v31load, asm: "VFMADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PS512load", argLength: 4, reg: w31load, asm: "VFMADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADD213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PD128load", argLength: 4, reg: v31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PD256load", argLength: 4, reg: v31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PD512load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PS128load", argLength: 4, reg: v31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PS256load", argLength: 4, reg: v31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PS512load", argLength: 4, reg: w31load, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMADDSUB213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PD128load", argLength: 4, reg: v31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PD256load", argLength: 4, reg: v31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PD512load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PDMasked128load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PDMasked256load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PDMasked512load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PS128load", argLength: 4, reg: v31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PS256load", argLength: 4, reg: v31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PS512load", argLength: 4, reg: w31load, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PSMasked128load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PSMasked256load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VFMSUBADD213PSMasked512load", argLength: 5, reg: w3kwload, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VGF2P8MULB128load", argLength: 3, reg: w21load, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8MULB256load", argLength: 3, reg: w21load, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8MULBMasked128load", argLength: 4, reg: w2kwload, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8MULBMasked256load", argLength: 4, reg: w2kwload, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHADDPD128load", argLength: 3, reg: v21load, asm: "VHADDPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHADDPD256load", argLength: 3, reg: v21load, asm: "VHADDPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHADDPS128load", argLength: 3, reg: v21load, asm: "VHADDPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHADDPS256load", argLength: 3, reg: v21load, asm: "VHADDPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHSUBPD128load", argLength: 3, reg: v21load, asm: "VHSUBPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHSUBPD256load", argLength: 3, reg: v21load, asm: "VHSUBPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHSUBPS128load", argLength: 3, reg: v21load, asm: "VHSUBPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VHSUBPS256load", argLength: 3, reg: v21load, asm: "VHSUBPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPD128load", argLength: 3, reg: v21load, asm: "VMAXPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPD256load", argLength: 3, reg: v21load, asm: "VMAXPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPD512load", argLength: 3, reg: w21load, asm: "VMAXPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPDMasked128load", argLength: 4, reg: w2kwload, asm: "VMAXPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPDMasked256load", argLength: 4, reg: w2kwload, asm: "VMAXPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPDMasked512load", argLength: 4, reg: w2kwload, asm: "VMAXPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS128load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS256load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS512load", argLength: 3, reg: w21load, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPSMasked128load", argLength: 4, reg: w2kwload, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPSMasked256load", argLength: 4, reg: w2kwload, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPSMasked512load", argLength: 4, reg: w2kwload, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPD128load", argLength: 3, reg: v21load, asm: "VMINPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPD256load", argLength: 3, reg: v21load, asm: "VMINPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPD512load", argLength: 3, reg: w21load, asm: "VMINPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPDMasked128load", argLength: 4, reg: w2kwload, asm: "VMINPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPDMasked256load", argLength: 4, reg: w2kwload, asm: "VMINPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPDMasked512load", argLength: 4, reg: w2kwload, asm: "VMINPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPS128load", argLength: 3, reg: v21load, asm: "VMINPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPS256load", argLength: 3, reg: v21load, asm: "VMINPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPS512load", argLength: 3, reg: w21load, asm: "VMINPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPSMasked128load", argLength: 4, reg: w2kwload, asm: "VMINPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPSMasked256load", argLength: 4, reg: w2kwload, asm: "VMINPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMINPSMasked512load", argLength: 4, reg: w2kwload, asm: "VMINPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPD128load", argLength: 3, reg: v21load, asm: "VMULPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPD256load", argLength: 3, reg: v21load, asm: "VMULPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPD512load", argLength: 3, reg: w21load, asm: "VMULPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPDMasked128load", argLength: 4, reg: w2kwload, asm: "VMULPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPDMasked256load", argLength: 4, reg: w2kwload, asm: "VMULPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPDMasked512load", argLength: 4, reg: w2kwload, asm: "VMULPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPS128load", argLength: 3, reg: v21load, asm: "VMULPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPS256load", argLength: 3, reg: v21load, asm: "VMULPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPS512load", argLength: 3, reg: w21load, asm: "VMULPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPSMasked128load", argLength: 4, reg: w2kwload, asm: "VMULPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPSMasked256load", argLength: 4, reg: w2kwload, asm: "VMULPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMULPSMasked512load", argLength: 4, reg: w2kwload, asm: "VMULPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSB128load", argLength: 2, reg: v11load, asm: "VPABSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSB256load", argLength: 2, reg: v11load, asm: "VPABSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSBMasked128load", argLength: 3, reg: wkwload, asm: "VPABSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSBMasked256load", argLength: 3, reg: wkwload, asm: "VPABSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSD128load", argLength: 2, reg: v11load, asm: "VPABSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSD256load", argLength: 2, reg: v11load, asm: "VPABSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSD512load", argLength: 2, reg: w11load, asm: "VPABSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSDMasked128load", argLength: 3, reg: wkwload, asm: "VPABSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSDMasked256load", argLength: 3, reg: wkwload, asm: "VPABSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1652,26 +1739,66 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPABSQMasked128load", argLength: 3, reg: wkwload, asm: "VPABSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQMasked256load", argLength: 3, reg: wkwload, asm: "VPABSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSQMasked512load", argLength: 3, reg: wkwload, asm: "VPABSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSW128load", argLength: 2, reg: v11load, asm: "VPABSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSW256load", argLength: 2, reg: v11load, asm: "VPABSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSWMasked128load", argLength: 3, reg: wkwload, asm: "VPABSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPABSWMasked256load", argLength: 3, reg: wkwload, asm: "VPABSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDW128load", argLength: 3, reg: v21load, asm: "VPACKSSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDW256load", argLength: 3, reg: v21load, asm: "VPACKSSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDW512load", argLength: 3, reg: w21load, asm: "VPACKSSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPACKSSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPACKSSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKSSDWMasked512load", argLength: 4, reg: w2kwload, asm: "VPACKSSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDW128load", argLength: 3, reg: v21load, asm: "VPACKUSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDW256load", argLength: 3, reg: v21load, asm: "VPACKUSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDW512load", argLength: 3, reg: w21load, asm: "VPACKUSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPACKUSDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPACKUSDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPACKUSDWMasked512load", argLength: 4, reg: w2kwload, asm: "VPACKUSDW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDB128load", argLength: 3, reg: v21load, asm: "VPADDB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDB256load", argLength: 3, reg: v21load, asm: "VPADDB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDBMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDBMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDD128load", argLength: 3, reg: v21load, asm: "VPADDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDD256load", argLength: 3, reg: v21load, asm: "VPADDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDD512load", argLength: 3, reg: w21load, asm: "VPADDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPADDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQ128load", argLength: 3, reg: v21load, asm: "VPADDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQ256load", argLength: 3, reg: v21load, asm: "VPADDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQ512load", argLength: 3, reg: w21load, asm: "VPADDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPADDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSB128load", argLength: 3, reg: v21load, asm: "VPADDSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSB256load", argLength: 3, reg: v21load, asm: "VPADDSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSBMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSBMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSW128load", argLength: 3, reg: v21load, asm: "VPADDSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSW256load", argLength: 3, reg: v21load, asm: "VPADDSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSB128load", argLength: 3, reg: v21load, asm: "VPADDUSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSB256load", argLength: 3, reg: v21load, asm: "VPADDUSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSBMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDUSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSBMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDUSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSW128load", argLength: 3, reg: v21load, asm: "VPADDUSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSW256load", argLength: 3, reg: v21load, asm: "VPADDUSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDUSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDUSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDUSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDW128load", argLength: 3, reg: v21load, asm: "VPADDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDW256load", argLength: 3, reg: v21load, asm: "VPADDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPADDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPADDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPADDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAND128load", argLength: 3, reg: v21load, asm: "VPAND", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAND256load", argLength: 3, reg: v21load, asm: "VPAND", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDD512load", argLength: 3, reg: w21load, asm: "VPANDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPANDD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDN128load", argLength: 3, reg: v21load, asm: "VPANDN", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDN256load", argLength: 3, reg: v21load, asm: "VPANDN", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDND512load", argLength: 3, reg: w21load, asm: "VPANDND", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNDMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDND", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDNDMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDND", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1684,19 +1811,74 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPANDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPANDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPANDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPANDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPANDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGB128load", argLength: 3, reg: v21load, asm: "VPAVGB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGB256load", argLength: 3, reg: v21load, asm: "VPAVGB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGBMasked128load", argLength: 4, reg: w2kwload, asm: "VPAVGB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGBMasked256load", argLength: 4, reg: w2kwload, asm: "VPAVGB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGW128load", argLength: 3, reg: v21load, asm: "VPAVGW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGW256load", argLength: 3, reg: v21load, asm: "VPAVGW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGWMasked128load", argLength: 4, reg: w2kwload, asm: "VPAVGW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPAVGWMasked256load", argLength: 4, reg: w2kwload, asm: "VPAVGW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBLENDMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBLENDMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTB128load", argLength: 2, reg: v11load, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTB256load", argLength: 2, reg: v11load, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTB512load", argLength: 2, reg: w11load, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTBMasked128load", argLength: 3, reg: wkwload, asm: "VPBROADCASTB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTBMasked256load", argLength: 3, reg: wkwload, asm: "VPBROADCASTB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTBMasked512load", argLength: 3, reg: wkwload, asm: "VPBROADCASTB", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTD128load", argLength: 2, reg: v11load, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTD256load", argLength: 2, reg: v11load, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTD512load", argLength: 2, reg: w11load, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTDMasked128load", argLength: 3, reg: wkwload, asm: "VPBROADCASTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTDMasked256load", argLength: 3, reg: wkwload, asm: "VPBROADCASTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTDMasked512load", argLength: 3, reg: wkwload, asm: "VPBROADCASTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTQ128load", argLength: 2, reg: v11load, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTQ256load", argLength: 2, reg: v11load, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTQ512load", argLength: 2, reg: w11load, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTQMasked128load", argLength: 3, reg: wkwload, asm: "VPBROADCASTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTQMasked256load", argLength: 3, reg: wkwload, asm: "VPBROADCASTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTQMasked512load", argLength: 3, reg: wkwload, asm: "VPBROADCASTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTW128load", argLength: 2, reg: v11load, asm: "VPBROADCASTW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBROADCASTW256load", argLength: 2, reg: v11load, asm: "VPBROADCASTW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQB128load", argLength: 3, reg: v21load, asm: "VPCMPEQB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQB256load", argLength: 3, reg: v21load, asm: "VPCMPEQB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQD128load", argLength: 3, reg: v21load, asm: "VPCMPEQD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQD256load", argLength: 3, reg: v21load, asm: "VPCMPEQD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQD512load", argLength: 3, reg: w2kload, asm: "VPCMPEQD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQQ128load", argLength: 3, reg: v21load, asm: "VPCMPEQQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQQ256load", argLength: 3, reg: v21load, asm: "VPCMPEQQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQW128load", argLength: 3, reg: v21load, asm: "VPCMPEQW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPEQW256load", argLength: 3, reg: v21load, asm: "VPCMPEQW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTB128load", argLength: 3, reg: v21load, asm: "VPCMPGTB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTB256load", argLength: 3, reg: v21load, asm: "VPCMPGTB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD128load", argLength: 3, reg: v21load, asm: "VPCMPGTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD256load", argLength: 3, reg: v21load, asm: "VPCMPGTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ128load", argLength: 3, reg: v21load, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ256load", argLength: 3, reg: v21load, asm: "VPCMPGTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTW128load", argLength: 3, reg: v21load, asm: "VPCMPGTW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTW256load", argLength: 3, reg: v21load, asm: "VPCMPGTW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPDPWSSD128load", argLength: 4, reg: v31load, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSD256load", argLength: 4, reg: v31load, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMB128load", argLength: 3, reg: w21load, asm: "VPERMB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMB256load", argLength: 3, reg: w21load, asm: "VPERMB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMBMasked128load", argLength: 4, reg: w2kwload, asm: "VPERMB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMBMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMD256load", argLength: 3, reg: v21load, asm: "VPERMD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMD512load", argLength: 3, reg: w21load, asm: "VPERMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMDMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMI2B128load", argLength: 4, reg: w31load, asm: "VPERMI2B", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2B256load", argLength: 4, reg: w31load, asm: "VPERMI2B", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2BMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2B", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2BMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2B", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2D128load", argLength: 4, reg: w31load, asm: "VPERMI2D", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2D256load", argLength: 4, reg: w31load, asm: "VPERMI2D", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2D512load", argLength: 4, reg: w31load, asm: "VPERMI2D", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
@ -1721,10 +1903,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPERMI2QMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2Q", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2QMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2Q", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2QMasked512load", argLength: 5, reg: w3kwload, asm: "VPERMI2Q", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2W128load", argLength: 4, reg: w31load, asm: "VPERMI2W", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2W256load", argLength: 4, reg: w31load, asm: "VPERMI2W", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2WMasked128load", argLength: 5, reg: w3kwload, asm: "VPERMI2W", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMI2WMasked256load", argLength: 5, reg: w3kwload, asm: "VPERMI2W", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPERMPD256load", argLength: 3, reg: w21load, asm: "VPERMPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPD512load", argLength: 3, reg: w21load, asm: "VPERMPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPDMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPDMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPS256load", argLength: 3, reg: v21load, asm: "VPERMPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPS512load", argLength: 3, reg: w21load, asm: "VPERMPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPSMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMPSMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1732,6 +1919,30 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPERMQ512load", argLength: 3, reg: w21load, asm: "VPERMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMQMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPERMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMW128load", argLength: 3, reg: w21load, asm: "VPERMW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMW256load", argLength: 3, reg: w21load, asm: "VPERMW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMWMasked128load", argLength: 4, reg: w2kwload, asm: "VPERMW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPERMWMasked256load", argLength: 4, reg: w2kwload, asm: "VPERMW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDBMasked128load", argLength: 3, reg: wkwload, asm: "VPEXPANDB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDBMasked256load", argLength: 3, reg: wkwload, asm: "VPEXPANDB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDDMasked128load", argLength: 3, reg: wkwload, asm: "VPEXPANDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDDMasked256load", argLength: 3, reg: wkwload, asm: "VPEXPANDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDQMasked128load", argLength: 3, reg: wkwload, asm: "VPEXPANDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDQMasked256load", argLength: 3, reg: wkwload, asm: "VPEXPANDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDWMasked128load", argLength: 3, reg: wkwload, asm: "VPEXPANDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPEXPANDWMasked256load", argLength: 3, reg: wkwload, asm: "VPEXPANDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHADDD128load", argLength: 3, reg: v21load, asm: "VPHADDD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHADDD256load", argLength: 3, reg: v21load, asm: "VPHADDD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHADDSW128load", argLength: 3, reg: v21load, asm: "VPHADDSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHADDSW256load", argLength: 3, reg: v21load, asm: "VPHADDSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHADDW128load", argLength: 3, reg: v21load, asm: "VPHADDW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHADDW256load", argLength: 3, reg: v21load, asm: "VPHADDW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHSUBD128load", argLength: 3, reg: v21load, asm: "VPHSUBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHSUBD256load", argLength: 3, reg: v21load, asm: "VPHSUBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHSUBSW128load", argLength: 3, reg: v21load, asm: "VPHSUBSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHSUBSW256load", argLength: 3, reg: v21load, asm: "VPHSUBSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHSUBW128load", argLength: 3, reg: v21load, asm: "VPHSUBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPHSUBW256load", argLength: 3, reg: v21load, asm: "VPHSUBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTD128load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTD256load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTD512load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1744,6 +1955,20 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPLZCNTQMasked128load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQMasked256load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQMasked512load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDUBSW128load", argLength: 3, reg: v21load, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDUBSW256load", argLength: 3, reg: v21load, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDUBSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDUBSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDWD128load", argLength: 3, reg: v21load, asm: "VPMADDWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDWD256load", argLength: 3, reg: v21load, asm: "VPMADDWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDWDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMADDWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMADDWDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMADDWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSB128load", argLength: 3, reg: v21load, asm: "VPMAXSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSB256load", argLength: 3, reg: v21load, asm: "VPMAXSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSBMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSBMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSD128load", argLength: 3, reg: v21load, asm: "VPMAXSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSD256load", argLength: 3, reg: v21load, asm: "VPMAXSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSD512load", argLength: 3, reg: w21load, asm: "VPMAXSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1754,6 +1979,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMAXSQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMAXSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSW128load", argLength: 3, reg: v21load, asm: "VPMAXSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSW256load", argLength: 3, reg: v21load, asm: "VPMAXSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUB128load", argLength: 3, reg: v21load, asm: "VPMAXUB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUB256load", argLength: 3, reg: v21load, asm: "VPMAXUB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUBMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXUB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUBMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXUB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUD128load", argLength: 3, reg: v21load, asm: "VPMAXUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUD256load", argLength: 3, reg: v21load, asm: "VPMAXUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUD512load", argLength: 3, reg: w21load, asm: "VPMAXUD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1764,6 +1999,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMAXUQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXUQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXUQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMAXUQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUW128load", argLength: 3, reg: v21load, asm: "VPMAXUW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUW256load", argLength: 3, reg: v21load, asm: "VPMAXUW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMAXUW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMAXUWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMAXUW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSB128load", argLength: 3, reg: v21load, asm: "VPMINSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSB256load", argLength: 3, reg: v21load, asm: "VPMINSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSBMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSBMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSD128load", argLength: 3, reg: v21load, asm: "VPMINSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSD256load", argLength: 3, reg: v21load, asm: "VPMINSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSD512load", argLength: 3, reg: w21load, asm: "VPMINSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1774,6 +2019,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINSQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMINSQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSW128load", argLength: 3, reg: v21load, asm: "VPMINSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSW256load", argLength: 3, reg: v21load, asm: "VPMINSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUB128load", argLength: 3, reg: v21load, asm: "VPMINUB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUB256load", argLength: 3, reg: v21load, asm: "VPMINUB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUBMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINUB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUBMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINUB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUD128load", argLength: 3, reg: v21load, asm: "VPMINUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUD256load", argLength: 3, reg: v21load, asm: "VPMINUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUD512load", argLength: 3, reg: w21load, asm: "VPMINUD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINUD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINUD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1784,6 +2039,92 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINUQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINUQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINUQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMINUQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUW128load", argLength: 3, reg: v21load, asm: "VPMINUW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUW256load", argLength: 3, reg: v21load, asm: "VPMINUW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMINUW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMINUWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMINUW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBD128load", argLength: 2, reg: v11load, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBD256load", argLength: 2, reg: v11load, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBD512load", argLength: 2, reg: w11load, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBDMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBDMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBDMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVSXBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBQ128load", argLength: 2, reg: v11load, asm: "VPMOVSXBQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBQ256load", argLength: 2, reg: v11load, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBQ512load", argLength: 2, reg: w11load, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBQMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVSXBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBQMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVSXBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBW128load", argLength: 2, reg: v11load, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBW256load", argLength: 2, reg: v11load, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBW512load", argLength: 2, reg: w11load, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBWMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVSXBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBWMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVSXBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXBWMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVSXBW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXDQ128load", argLength: 2, reg: v11load, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXDQ256load", argLength: 2, reg: v11load, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXDQ512load", argLength: 2, reg: w11load, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXDQMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVSXDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXDQMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVSXDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXDQMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVSXDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWD128load", argLength: 2, reg: v11load, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWD256load", argLength: 2, reg: v11load, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWD512load", argLength: 2, reg: w11load, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWDMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVSXWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWDMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVSXWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWDMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVSXWD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWQ128load", argLength: 2, reg: v11load, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWQ256load", argLength: 2, reg: v11load, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWQ512load", argLength: 2, reg: w11load, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWQMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWQMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVSXWQMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBD128load", argLength: 2, reg: v11load, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBD256load", argLength: 2, reg: v11load, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBD512load", argLength: 2, reg: w11load, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBDMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBDMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBDMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVZXBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBQ128load", argLength: 2, reg: v11load, asm: "VPMOVZXBQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBQ256load", argLength: 2, reg: v11load, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBQ512load", argLength: 2, reg: w11load, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBQMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVZXBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBQMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVZXBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBW128load", argLength: 2, reg: v11load, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBW256load", argLength: 2, reg: v11load, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBW512load", argLength: 2, reg: w11load, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBWMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVZXBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBWMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVZXBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXBWMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVZXBW", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXDQ128load", argLength: 2, reg: v11load, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXDQ256load", argLength: 2, reg: v11load, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXDQ512load", argLength: 2, reg: w11load, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXDQMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVZXDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXDQMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVZXDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXDQMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVZXDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWD128load", argLength: 2, reg: v11load, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWD256load", argLength: 2, reg: v11load, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWD512load", argLength: 2, reg: w11load, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWDMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVZXWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWDMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVZXWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWDMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVZXWD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWQ128load", argLength: 2, reg: v11load, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWQ256load", argLength: 2, reg: v11load, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWQ512load", argLength: 2, reg: w11load, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWQMasked128load", argLength: 3, reg: wkwload, asm: "VPMOVZXWQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWQMasked256load", argLength: 3, reg: wkwload, asm: "VPMOVZXWQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMOVZXWQMasked512load", argLength: 3, reg: wkwload, asm: "VPMOVZXWQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULDQ128load", argLength: 3, reg: v21load, asm: "VPMULDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULDQ256load", argLength: 3, reg: v21load, asm: "VPMULDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHUW128load", argLength: 3, reg: v21load, asm: "VPMULHUW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHUW256load", argLength: 3, reg: v21load, asm: "VPMULHUW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHUWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULHUW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHUWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULHUW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHW128load", argLength: 3, reg: v21load, asm: "VPMULHW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHW256load", argLength: 3, reg: v21load, asm: "VPMULHW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULHW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULHWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULHW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLD128load", argLength: 3, reg: v21load, asm: "VPMULLD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLD256load", argLength: 3, reg: v21load, asm: "VPMULLD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLD512load", argLength: 3, reg: w21load, asm: "VPMULLD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLDMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULLD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLDMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULLD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1794,6 +2135,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMULLQMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULLQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULLQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLQMasked512load", argLength: 4, reg: w2kwload, asm: "VPMULLQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLW128load", argLength: 3, reg: v21load, asm: "VPMULLW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLW256load", argLength: 3, reg: v21load, asm: "VPMULLW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLWMasked128load", argLength: 4, reg: w2kwload, asm: "VPMULLW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULLWMasked256load", argLength: 4, reg: w2kwload, asm: "VPMULLW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULUDQ128load", argLength: 3, reg: v21load, asm: "VPMULUDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPMULUDQ256load", argLength: 3, reg: v21load, asm: "VPMULUDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTB128load", argLength: 2, reg: w11load, asm: "VPOPCNTB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTB256load", argLength: 2, reg: w11load, asm: "VPOPCNTB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTBMasked128load", argLength: 3, reg: wkwload, asm: "VPOPCNTB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTBMasked256load", argLength: 3, reg: wkwload, asm: "VPOPCNTB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTD128load", argLength: 2, reg: w11load, asm: "VPOPCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTD256load", argLength: 2, reg: w11load, asm: "VPOPCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTD512load", argLength: 2, reg: w11load, asm: "VPOPCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1806,6 +2157,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPOPCNTQMasked128load", argLength: 3, reg: wkwload, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQMasked256load", argLength: 3, reg: wkwload, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTQMasked512load", argLength: 3, reg: wkwload, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTW128load", argLength: 2, reg: w11load, asm: "VPOPCNTW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTW256load", argLength: 2, reg: w11load, asm: "VPOPCNTW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTWMasked128load", argLength: 3, reg: wkwload, asm: "VPOPCNTW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOPCNTWMasked256load", argLength: 3, reg: wkwload, asm: "VPOPCNTW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOR128load", argLength: 3, reg: v21load, asm: "VPOR", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPOR256load", argLength: 3, reg: v21load, asm: "VPOR", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORD512load", argLength: 3, reg: w21load, asm: "VPORD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORDMasked128load", argLength: 4, reg: w2kwload, asm: "VPORD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPORDMasked256load", argLength: 4, reg: w2kwload, asm: "VPORD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1838,6 +2195,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPRORVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPRORVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPRORVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPRORVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSADBW128load", argLength: 3, reg: v21load, asm: "VPSADBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSADBW256load", argLength: 3, reg: v21load, asm: "VPSADBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDVD128load", argLength: 4, reg: w31load, asm: "VPSHLDVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVD256load", argLength: 4, reg: w31load, asm: "VPSHLDVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVD512load", argLength: 4, reg: w31load, asm: "VPSHLDVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
@ -1850,6 +2209,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHLDVQMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVQMasked512load", argLength: 5, reg: w3kwload, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVW128load", argLength: 4, reg: w31load, asm: "VPSHLDVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVW256load", argLength: 4, reg: w31load, asm: "VPSHLDVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVWMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHLDVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHLDVWMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHLDVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVD128load", argLength: 4, reg: w31load, asm: "VPSHRDVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVD256load", argLength: 4, reg: w31load, asm: "VPSHRDVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVD512load", argLength: 4, reg: w31load, asm: "VPSHRDVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
@ -1862,14 +2225,38 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHRDVQMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVQMasked512load", argLength: 5, reg: w3kwload, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVW128load", argLength: 4, reg: w31load, asm: "VPSHRDVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVW256load", argLength: 4, reg: w31load, asm: "VPSHRDVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVWMasked128load", argLength: 5, reg: w3kwload, asm: "VPSHRDVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHRDVWMasked256load", argLength: 5, reg: w3kwload, asm: "VPSHRDVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPSHUFB128load", argLength: 3, reg: v21load, asm: "VPSHUFB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFB256load", argLength: 3, reg: v21load, asm: "VPSHUFB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFBMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHUFB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFBMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHUFB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSIGNB128load", argLength: 3, reg: v21load, asm: "VPSIGNB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSIGNB256load", argLength: 3, reg: v21load, asm: "VPSIGNB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSIGND128load", argLength: 3, reg: v21load, asm: "VPSIGND", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSIGND256load", argLength: 3, reg: v21load, asm: "VPSIGND", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSIGNW128load", argLength: 3, reg: v21load, asm: "VPSIGNW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSIGNW256load", argLength: 3, reg: v21load, asm: "VPSIGNW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVD128load", argLength: 3, reg: v21load, asm: "VPSLLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVD256load", argLength: 3, reg: v21load, asm: "VPSLLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVD512load", argLength: 3, reg: w21load, asm: "VPSLLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSLLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSLLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSLLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQ128load", argLength: 3, reg: v21load, asm: "VPSLLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQ256load", argLength: 3, reg: v21load, asm: "VPSLLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQ512load", argLength: 3, reg: w21load, asm: "VPSLLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSLLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSLLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSLLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVW128load", argLength: 3, reg: w21load, asm: "VPSLLVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVW256load", argLength: 3, reg: w21load, asm: "VPSLLVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSLLVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLVWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSLLVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVD128load", argLength: 3, reg: v21load, asm: "VPSRAVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVD256load", argLength: 3, reg: v21load, asm: "VPSRAVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVD512load", argLength: 3, reg: w21load, asm: "VPSRAVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRAVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRAVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1880,26 +2267,80 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSRAVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRAVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRAVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRAVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVW128load", argLength: 3, reg: w21load, asm: "VPSRAVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVW256load", argLength: 3, reg: w21load, asm: "VPSRAVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRAVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAVWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRAVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVD128load", argLength: 3, reg: v21load, asm: "VPSRLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVD256load", argLength: 3, reg: v21load, asm: "VPSRLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVD512load", argLength: 3, reg: w21load, asm: "VPSRLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRLVD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRLVD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRLVD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQ128load", argLength: 3, reg: v21load, asm: "VPSRLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQ256load", argLength: 3, reg: v21load, asm: "VPSRLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQ512load", argLength: 3, reg: w21load, asm: "VPSRLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRLVQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRLVQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSRLVQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVW128load", argLength: 3, reg: w21load, asm: "VPSRLVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVW256load", argLength: 3, reg: w21load, asm: "VPSRLVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSRLVW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLVWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSRLVW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBB128load", argLength: 3, reg: v21load, asm: "VPSUBB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBB256load", argLength: 3, reg: v21load, asm: "VPSUBB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBBMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBBMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBD128load", argLength: 3, reg: v21load, asm: "VPSUBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBD256load", argLength: 3, reg: v21load, asm: "VPSUBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBD512load", argLength: 3, reg: w21load, asm: "VPSUBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSUBD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQ128load", argLength: 3, reg: v21load, asm: "VPSUBQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQ256load", argLength: 3, reg: v21load, asm: "VPSUBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQ512load", argLength: 3, reg: w21load, asm: "VPSUBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSUBQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSB128load", argLength: 3, reg: v21load, asm: "VPSUBSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSB256load", argLength: 3, reg: v21load, asm: "VPSUBSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSBMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSBMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSW128load", argLength: 3, reg: v21load, asm: "VPSUBSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSW256load", argLength: 3, reg: v21load, asm: "VPSUBSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSB128load", argLength: 3, reg: v21load, asm: "VPSUBUSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSB256load", argLength: 3, reg: v21load, asm: "VPSUBUSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSBMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBUSB", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSBMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBUSB", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSW128load", argLength: 3, reg: v21load, asm: "VPSUBUSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSW256load", argLength: 3, reg: v21load, asm: "VPSUBUSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBUSW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBUSWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBUSW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBW128load", argLength: 3, reg: v21load, asm: "VPSUBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBW256load", argLength: 3, reg: v21load, asm: "VPSUBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSUBW", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPSUBWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSUBW", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKHDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHQDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKHQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHWD128load", argLength: 3, reg: v21load, asm: "VPUNPCKHWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKHWD256load", argLength: 3, reg: v21load, asm: "VPUNPCKHWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ128load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLWD128load", argLength: 3, reg: v21load, asm: "VPUNPCKLWD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLWD256load", argLength: 3, reg: v21load, asm: "VPUNPCKLWD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXOR128load", argLength: 3, reg: v21load, asm: "VPXOR", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXOR256load", argLength: 3, reg: v21load, asm: "VPXOR", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORD512load", argLength: 3, reg: w21load, asm: "VPXORD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORDMasked128load", argLength: 4, reg: w2kwload, asm: "VPXORD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPXORDMasked256load", argLength: 4, reg: w2kwload, asm: "VPXORD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1918,6 +2359,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VRCP14PSMasked128load", argLength: 3, reg: wkwload, asm: "VRCP14PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PSMasked256load", argLength: 3, reg: wkwload, asm: "VRCP14PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCP14PSMasked512load", argLength: 3, reg: wkwload, asm: "VRCP14PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCPPS128load", argLength: 2, reg: v11load, asm: "VRCPPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRCPPS256load", argLength: 2, reg: v11load, asm: "VRCPPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PD128load", argLength: 2, reg: w11load, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PD256load", argLength: 2, reg: w11load, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PD512load", argLength: 2, reg: w11load, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1928,6 +2371,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VRSQRT14PSMasked128load", argLength: 3, reg: wkwload, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PSMasked256load", argLength: 3, reg: wkwload, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRT14PSMasked512load", argLength: 3, reg: wkwload, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRTPS128load", argLength: 2, reg: v11load, asm: "VRSQRTPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRSQRTPS256load", argLength: 2, reg: v11load, asm: "VRSQRTPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPD128load", argLength: 3, reg: w21load, asm: "VSCALEFPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPD256load", argLength: 3, reg: w21load, asm: "VSCALEFPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPD512load", argLength: 3, reg: w21load, asm: "VSCALEFPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1940,26 +2385,39 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VSCALEFPSMasked128load", argLength: 4, reg: w2kwload, asm: "VSCALEFPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPSMasked256load", argLength: 4, reg: w2kwload, asm: "VSCALEFPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSCALEFPSMasked512load", argLength: 4, reg: w2kwload, asm: "VSCALEFPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPD128load", argLength: 2, reg: v11load, asm: "VSQRTPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPD256load", argLength: 2, reg: v11load, asm: "VSQRTPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPD512load", argLength: 2, reg: w11load, asm: "VSQRTPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPDMasked128load", argLength: 3, reg: wkwload, asm: "VSQRTPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPDMasked256load", argLength: 3, reg: wkwload, asm: "VSQRTPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPDMasked512load", argLength: 3, reg: wkwload, asm: "VSQRTPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPS128load", argLength: 2, reg: v11load, asm: "VSQRTPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPS256load", argLength: 2, reg: v11load, asm: "VSQRTPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPS512load", argLength: 2, reg: w11load, asm: "VSQRTPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPSMasked128load", argLength: 3, reg: wkwload, asm: "VSQRTPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPSMasked256load", argLength: 3, reg: wkwload, asm: "VSQRTPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSQRTPSMasked512load", argLength: 3, reg: wkwload, asm: "VSQRTPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPD128load", argLength: 3, reg: v21load, asm: "VSUBPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPD256load", argLength: 3, reg: v21load, asm: "VSUBPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPD512load", argLength: 3, reg: w21load, asm: "VSUBPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPDMasked128load", argLength: 4, reg: w2kwload, asm: "VSUBPD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPDMasked256load", argLength: 4, reg: w2kwload, asm: "VSUBPD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPDMasked512load", argLength: 4, reg: w2kwload, asm: "VSUBPD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPS128load", argLength: 3, reg: v21load, asm: "VSUBPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPS256load", argLength: 3, reg: v21load, asm: "VSUBPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPS512load", argLength: 3, reg: w21load, asm: "VSUBPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPSMasked128load", argLength: 4, reg: w2kwload, asm: "VSUBPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPSMasked256load", argLength: 4, reg: w2kwload, asm: "VSUBPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VSUBPSMasked512load", argLength: 4, reg: w2kwload, asm: "VSUBPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VAESKEYGENASSIST128load", argLength: 2, reg: v11load, asm: "VAESKEYGENASSIST", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPD128load", argLength: 3, reg: v21load, asm: "VCMPPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPD256load", argLength: 3, reg: v21load, asm: "VCMPPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPD512load", argLength: 3, reg: w2kload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPDMasked128load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPDMasked256load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPDMasked512load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPS128load", argLength: 3, reg: v21load, asm: "VCMPPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPS256load", argLength: 3, reg: v21load, asm: "VCMPPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPS512load", argLength: 3, reg: w2kload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPSMasked128load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPSMasked256load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -1976,6 +2434,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VGF2P8AFFINEQBMasked128load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked256load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked512load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VINSERTF64X4512load", argLength: 3, reg: w21load, asm: "VINSERTF64X4", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VINSERTF128256load", argLength: 3, reg: v21load, asm: "VINSERTF128", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VINSERTI64X4512load", argLength: 3, reg: w21load, asm: "VINSERTI64X4", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VINSERTI128256load", argLength: 3, reg: v21load, asm: "VINSERTI128", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPALIGNR128load", argLength: 3, reg: v21load, asm: "VPALIGNR", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPALIGNR256load", argLength: 3, reg: v21load, asm: "VPALIGNR", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPALIGNRMasked128load", argLength: 4, reg: w2kwload, asm: "VPALIGNR", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPALIGNRMasked256load", argLength: 4, reg: w2kwload, asm: "VPALIGNR", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCLMULQDQ128load", argLength: 3, reg: v21load, asm: "VPCLMULQDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCLMULQDQ256load", argLength: 3, reg: w21load, asm: "VPCLMULQDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPBMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPB", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPBMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPB", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPD512load", argLength: 3, reg: w2kload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPDMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPDMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -1984,6 +2454,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPCMPQMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPQMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPQMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUBMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUB", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUBMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUB", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUD512load", argLength: 3, reg: w2kload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUDMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUDMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -1992,6 +2464,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPCMPUQMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUQMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUQMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUWMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUW", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUWMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUW", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPWMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPW", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPWMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPW", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPERM2F128256load", argLength: 3, reg: v21load, asm: "VPERM2F128", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPERM2I128256load", argLength: 3, reg: v21load, asm: "VPERM2I128", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLD128load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLD256load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLD512load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -2028,6 +2506,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHLDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDW128load", argLength: 3, reg: w21load, asm: "VPSHLDW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDW256load", argLength: 3, reg: w21load, asm: "VPSHLDW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHLDW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHLDW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDD128load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDD256load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDD512load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -2040,10 +2522,24 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHRDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDW128load", argLength: 3, reg: w21load, asm: "VPSHRDW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDW256load", argLength: 3, reg: w21load, asm: "VPSHRDW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDWMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHRDW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDWMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHRDW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFD128load", argLength: 2, reg: v11load, asm: "VPSHUFD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFD256load", argLength: 2, reg: v11load, asm: "VPSHUFD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFD512load", argLength: 2, reg: w11load, asm: "VPSHUFD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFDMasked128load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFDMasked256load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFDMasked512load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFHW128load", argLength: 2, reg: v11load, asm: "VPSHUFHW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFHW256load", argLength: 2, reg: v11load, asm: "VPSHUFHW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFHWMasked128load", argLength: 3, reg: wkwload, asm: "VPSHUFHW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFHWMasked256load", argLength: 3, reg: wkwload, asm: "VPSHUFHW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFLW128load", argLength: 2, reg: v11load, asm: "VPSHUFLW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFLW256load", argLength: 2, reg: v11load, asm: "VPSHUFLW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFLWMasked128load", argLength: 3, reg: wkwload, asm: "VPSHUFLW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFLWMasked256load", argLength: 3, reg: wkwload, asm: "VPSHUFLW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLD512constload", argLength: 2, reg: w11load, asm: "VPSLLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLDMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLDMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -2052,6 +2548,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSLLQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLWMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLWMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAD512constload", argLength: 2, reg: w11load, asm: "VPSRAD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRADMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRADMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -2062,6 +2560,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSRAQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAWMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAWMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLD512constload", argLength: 2, reg: w11load, asm: "VPSRLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLDMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLDMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
@ -2070,6 +2570,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSRLQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLWMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLW", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLWMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLW", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPTERNLOGD128load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
{name: "VPTERNLOGD256load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
{name: "VPTERNLOGD512load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
@ -2100,7 +2602,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VRNDSCALEPSMasked128load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPSMasked256load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPSMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VROUNDPD128load", argLength: 2, reg: v11load, asm: "VROUNDPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VROUNDPD256load", argLength: 2, reg: v11load, asm: "VROUNDPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VROUNDPS128load", argLength: 2, reg: v11load, asm: "VROUNDPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VROUNDPS256load", argLength: 2, reg: v11load, asm: "VROUNDPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPD128load", argLength: 3, reg: v21load, asm: "VSHUFPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPD256load", argLength: 3, reg: v21load, asm: "VSHUFPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPD512load", argLength: 3, reg: w21load, asm: "VSHUFPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPS128load", argLength: 3, reg: v21load, asm: "VSHUFPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPS256load", argLength: 3, reg: v21load, asm: "VSHUFPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VSHUFPS512load", argLength: 3, reg: w21load, asm: "VSHUFPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VADDPDMasked128Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VADDPDMasked256Merging", argLength: 4, reg: w3kw, asm: "VADDPD", commutative: false, typ: "Vec256", resultInArg0: true},

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -839,12 +839,12 @@ func (o *Operation) hasMaskedMerging(maskType maskShape, outType outShape) bool
len(o.InVariant) == 1 && !strings.Contains(o.Asm, "BLEND") && !strings.Contains(o.Asm, "VMOVDQU")
}
func getVbcstData(s string) (feat1Match, feat2Match string) {
_, err := fmt.Sscanf(s, "feat1=%[^;];feat2=%s", &feat1Match, &feat2Match)
if err != nil {
panic(err)
func getVbcstData(s string) (string, string) {
feat1, feat2, found := strings.Cut(s, ";")
if !found || !strings.HasPrefix(feat1, "feat1=") || !strings.HasPrefix(feat2, "feat2=") {
panic(fmt.Sprintf("unexpected format for vbcst data: %s", s))
}
return
return strings.TrimPrefix(feat1, "feat1="), strings.TrimPrefix(feat2, "feat2=")
}
func (o Operation) String() string {

View file

@ -40,6 +40,21 @@ const (
var operandRemarks int
var skipMemOpsInstrs = map[string]bool{
// The SHA instructions has confusing semantics which we are too complicated
// to support.
"SHA1MSG1": true,
"SHA1MSG2": true,
"SHA1RNDS4": true,
"SHA1NEXTE": true,
"SHA256MSG1": true,
"SHA256MSG2": true,
"SHA256RNDS2": true,
// We don't support 3 input instructions with a memory operand (this appears
// to be the only one).
"VPBLENDVB": true,
}
// TODO: Doc. Returns Values with Def domains.
func loadXED(xedPath string) []*unify.Value {
// TODO: Obviously a bunch more to do here.
@ -96,15 +111,15 @@ func loadXED(xedPath string) []*unify.Value {
return
}
var data map[string][]opData
opcode := inst.Opcode()
mem := checkMem(ops)
if mem == "vbcst" {
if mem == "hasMem" && !skipMemOpsInstrs[opcode] {
// A pure vreg variant might exist, wait for later to see if we can
// merge them
data = memOps
} else {
data = otherOps
}
opcode := inst.Opcode()
if _, ok := data[opcode]; !ok {
s := make([]opData, 1)
s[0] = opData{inst, ops, mem}
@ -121,7 +136,7 @@ func loadXED(xedPath string) []*unify.Value {
// Checking if there is a vbcst variant of this operation exist
// First check the opcode
// Keep this logic in sync with [decodeOperands]
if ms, ok := memOps[opcode]; ok {
if ms, ok := memOps[opcode]; ok && !strings.HasPrefix(opcode, "VMOV") {
feat1, ok1 := decodeCPUFeature(o.inst)
// Then check if there exist such an operation that for all vreg
// shapes they are the same at the same index
@ -155,8 +170,8 @@ func loadXED(xedPath string) []*unify.Value {
} else {
_, ok3 := o.ops[j].(operandVReg)
_, ok4 := m.ops[j].(operandMem)
// The only difference must be the vreg and mem, no other cases.
if !ok3 || !ok4 {
// The only difference must be the vreg and mem, and the operand must be a read operand.
if !ok3 || !ok4 || !o.ops[j].common().action.r {
// A mismatch, skip this memOp
continue outer
}
@ -167,7 +182,7 @@ func loadXED(xedPath string) []*unify.Value {
feat1Match = feat1
feat2Match = feat2
if featMismatchCnt > 1 {
panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
panic(fmt.Sprintf("multiple feature mismatch vbcst memops detected for %s, simdgen failed to distinguish", opcode))
}
if !featMismatch {
// Mismatch feat is ok but should prioritize matching cases.
@ -455,9 +470,28 @@ func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
vbcst: true,
unknown: false,
}, nil
} else {
baseType, elemBits, ok := decodeType(op)
if !ok {
return nil, fmt.Errorf("failed to decode memory width %q", operand)
}
sizeStr := db.WidthSize(op.Width, xeddata.OpSize64)
bytes, err := strconv.Atoi(sizeStr)
if err != nil {
return nil, fmt.Errorf("failed to decode memory width %q: %s", operand, err)
}
if bytes > 0 {
memBits := bytes * 8
shape := vecShape{elemBits: elemBits, bits: memBits}
return operandMem{
operandCommon: common,
vecShape: shape,
elemBaseType: baseType,
vbcst: false,
unknown: false,
}, nil
}
}
// TODO: parse op.Width better to handle all cases
// Right now this will at least miss VPBROADCAST.
return operandMem{
operandCommon: common,
unknown: true,
@ -697,11 +731,10 @@ func checkMem(ops []operand) string {
} else if memCnt > 1 {
memState = "tooManyMem"
} else {
// We only have vbcst case as of now.
// This shape has an indication that [bits] fields has two possible value:
// 1. The element broadcast width, which is its peer vreg operand's [elemBits] (default val in the parsed XED data)
// 2. The full vector width, which is its peer vreg operand's [bits] (godefs should be aware of this)
memState = "vbcst"
memState = "hasMem"
}
}
return memState

View file

@ -110,3 +110,14 @@ func sftImmVPSRL() archsimd.Uint32x4 {
// amd64:`VPSRLD \$1, .*$`
return x.ShiftAllRight(1)
}
var globalSlice = []uint32{1, 2, 3, 4, 5, 6, 7, 8}
func simdMemoryOperandMerge() archsimd.Uint32x4 {
a := archsimd.BroadcastUint32x4(1)
// amd64:`VPADDD \([A-Z]+\), X\d, X\d`
a = a.Add(archsimd.LoadUint32x4Slice(globalSlice[0:4]))
// amd64:`VPADDD 16\([A-Z]+\), X\d, X\d`
a = a.Add(archsimd.LoadUint32x4Slice(globalSlice[4:8]))
return a
}