[dev.simd] cmd/compile: add memop peephole rules

Change-Id: I442da7964ca8b4b9012ed206ccb92f5e68b0d42b
Reviewed-on: https://go-review.googlesource.com/c/go/+/701695
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Junyang Shao 2025-09-08 14:29:35 +00:00
parent 9a349f8e72
commit 48f366d826
3 changed files with 15891 additions and 1 deletions

View file

@ -1469,3 +1469,497 @@
(VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask)
(VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) => (VPSRADMasked512const [a] x mask)
(VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask)
(VPABSD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD128load {sym} [off] ptr mem)
(VPABSD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD256load {sym} [off] ptr mem)
(VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem)
(VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem)
(VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem)
(VPABSQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ512load {sym} [off] ptr mem)
(VPABSDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked128load {sym} [off] ptr mask mem)
(VPABSDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked256load {sym} [off] ptr mask mem)
(VPABSDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked512load {sym} [off] ptr mask mem)
(VPABSQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked128load {sym} [off] ptr mask mem)
(VPABSQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked256load {sym} [off] ptr mask mem)
(VPABSQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked512load {sym} [off] ptr mask mem)
(VADDPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS128load {sym} [off] x ptr mem)
(VADDPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS256load {sym} [off] x ptr mem)
(VADDPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS512load {sym} [off] x ptr mem)
(VADDPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD128load {sym} [off] x ptr mem)
(VADDPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD256load {sym} [off] x ptr mem)
(VADDPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD512load {sym} [off] x ptr mem)
(VPADDD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD128load {sym} [off] x ptr mem)
(VPADDD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD256load {sym} [off] x ptr mem)
(VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD512load {sym} [off] x ptr mem)
(VPADDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ128load {sym} [off] x ptr mem)
(VPADDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ256load {sym} [off] x ptr mem)
(VPADDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ512load {sym} [off] x ptr mem)
(VPDPWSSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD128load {sym} [off] x y ptr mem)
(VPDPWSSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD256load {sym} [off] x y ptr mem)
(VPDPWSSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD512load {sym} [off] x y ptr mem)
(VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem)
(VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem)
(VPDPWSSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked512load {sym} [off] x y ptr mask mem)
(VPDPWSSDS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS128load {sym} [off] x y ptr mem)
(VPDPWSSDS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS256load {sym} [off] x y ptr mem)
(VPDPWSSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS512load {sym} [off] x y ptr mem)
(VPDPWSSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked128load {sym} [off] x y ptr mask mem)
(VPDPWSSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked256load {sym} [off] x y ptr mask mem)
(VPDPWSSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked512load {sym} [off] x y ptr mask mem)
(VPDPBUSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD128load {sym} [off] x y ptr mem)
(VPDPBUSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD256load {sym} [off] x y ptr mem)
(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem)
(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
(VPDPBUSDS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS128load {sym} [off] x y ptr mem)
(VPDPBUSDS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS256load {sym} [off] x y ptr mem)
(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem)
(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
(VADDPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPSMasked128load {sym} [off] x ptr mask mem)
(VADDPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPSMasked256load {sym} [off] x ptr mask mem)
(VADDPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPSMasked512load {sym} [off] x ptr mask mem)
(VADDPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked128load {sym} [off] x ptr mask mem)
(VADDPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked256load {sym} [off] x ptr mask mem)
(VADDPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked512load {sym} [off] x ptr mask mem)
(VPADDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked128load {sym} [off] x ptr mask mem)
(VPADDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked256load {sym} [off] x ptr mask mem)
(VPADDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked512load {sym} [off] x ptr mask mem)
(VPADDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked128load {sym} [off] x ptr mask mem)
(VPADDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked256load {sym} [off] x ptr mask mem)
(VPADDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked512load {sym} [off] x ptr mask mem)
(VPANDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDD512load {sym} [off] x ptr mem)
(VPANDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDQ512load {sym} [off] x ptr mem)
(VPANDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked128load {sym} [off] x ptr mask mem)
(VPANDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked256load {sym} [off] x ptr mask mem)
(VPANDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked512load {sym} [off] x ptr mask mem)
(VPANDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked128load {sym} [off] x ptr mask mem)
(VPANDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked256load {sym} [off] x ptr mask mem)
(VPANDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked512load {sym} [off] x ptr mask mem)
(VPANDND512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDND512load {sym} [off] x ptr mem)
(VPANDNQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDNQ512load {sym} [off] x ptr mem)
(VPANDNDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked128load {sym} [off] x ptr mask mem)
(VPANDNDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked256load {sym} [off] x ptr mask mem)
(VPANDNDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked512load {sym} [off] x ptr mask mem)
(VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked128load {sym} [off] x ptr mask mem)
(VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked256load {sym} [off] x ptr mask mem)
(VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked512load {sym} [off] x ptr mask mem)
(VPACKSSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW128load {sym} [off] x ptr mem)
(VPACKSSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW256load {sym} [off] x ptr mem)
(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem)
(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem)
(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem)
(VCVTTPS2DQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ128load {sym} [off] ptr mem)
(VCVTTPS2DQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ256load {sym} [off] ptr mem)
(VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ512load {sym} [off] ptr mem)
(VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem)
(VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem)
(VCVTTPS2DQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked512load {sym} [off] ptr mask mem)
(VPACKUSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW128load {sym} [off] x ptr mem)
(VPACKUSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW256load {sym} [off] x ptr mem)
(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem)
(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem)
(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem)
(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem)
(VCVTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ128load {sym} [off] ptr mem)
(VCVTPS2UDQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ256load {sym} [off] ptr mem)
(VCVTPS2UDQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ512load {sym} [off] ptr mem)
(VCVTPS2UDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQMasked128load {sym} [off] ptr mask mem)
(VCVTPS2UDQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQMasked256load {sym} [off] ptr mask mem)
(VCVTPS2UDQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQMasked512load {sym} [off] ptr mask mem)
(VDIVPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS128load {sym} [off] x ptr mem)
(VDIVPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS256load {sym} [off] x ptr mem)
(VDIVPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS512load {sym} [off] x ptr mem)
(VDIVPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD128load {sym} [off] x ptr mem)
(VDIVPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD256load {sym} [off] x ptr mem)
(VDIVPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD512load {sym} [off] x ptr mem)
(VDIVPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked128load {sym} [off] x ptr mask mem)
(VDIVPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked256load {sym} [off] x ptr mask mem)
(VDIVPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked512load {sym} [off] x ptr mask mem)
(VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem)
(VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem)
(VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem)
(VPCMPEQD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD128load {sym} [off] x ptr mem)
(VPCMPEQD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD256load {sym} [off] x ptr mem)
(VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem)
(VPCMPEQQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ128load {sym} [off] x ptr mem)
(VPCMPEQQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ256load {sym} [off] x ptr mem)
(VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem)
(VPCMPGTD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD128load {sym} [off] x ptr mem)
(VPCMPGTD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD256load {sym} [off] x ptr mem)
(VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD512load {sym} [off] x ptr mem)
(VPCMPGTQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ128load {sym} [off] x ptr mem)
(VPCMPGTQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ256load {sym} [off] x ptr mem)
(VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ512load {sym} [off] x ptr mem)
(VPUNPCKHDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ128load {sym} [off] x ptr mem)
(VPUNPCKHQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ128load {sym} [off] x ptr mem)
(VPUNPCKHDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ256load {sym} [off] x ptr mem)
(VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ512load {sym} [off] x ptr mem)
(VPUNPCKHQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ256load {sym} [off] x ptr mem)
(VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ512load {sym} [off] x ptr mem)
(VPUNPCKLDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ128load {sym} [off] x ptr mem)
(VPUNPCKLQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ128load {sym} [off] x ptr mem)
(VPUNPCKLDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ256load {sym} [off] x ptr mem)
(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem)
(VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem)
(VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem)
(VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem)
(VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem)
(VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem)
(VMAXPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD128load {sym} [off] x ptr mem)
(VMAXPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD256load {sym} [off] x ptr mem)
(VMAXPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD512load {sym} [off] x ptr mem)
(VPMAXSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD128load {sym} [off] x ptr mem)
(VPMAXSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD256load {sym} [off] x ptr mem)
(VPMAXSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD512load {sym} [off] x ptr mem)
(VPMAXSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ128load {sym} [off] x ptr mem)
(VPMAXSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ256load {sym} [off] x ptr mem)
(VPMAXSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ512load {sym} [off] x ptr mem)
(VPMAXUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD128load {sym} [off] x ptr mem)
(VPMAXUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD256load {sym} [off] x ptr mem)
(VPMAXUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD512load {sym} [off] x ptr mem)
(VPMAXUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ128load {sym} [off] x ptr mem)
(VPMAXUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ256load {sym} [off] x ptr mem)
(VPMAXUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ512load {sym} [off] x ptr mem)
(VMAXPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPSMasked128load {sym} [off] x ptr mask mem)
(VMAXPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPSMasked256load {sym} [off] x ptr mask mem)
(VMAXPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPSMasked512load {sym} [off] x ptr mask mem)
(VMAXPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked128load {sym} [off] x ptr mask mem)
(VMAXPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked256load {sym} [off] x ptr mask mem)
(VMAXPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked512load {sym} [off] x ptr mask mem)
(VPMAXSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked128load {sym} [off] x ptr mask mem)
(VPMAXSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked256load {sym} [off] x ptr mask mem)
(VPMAXSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked512load {sym} [off] x ptr mask mem)
(VPMAXSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked128load {sym} [off] x ptr mask mem)
(VPMAXSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked256load {sym} [off] x ptr mask mem)
(VPMAXSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked512load {sym} [off] x ptr mask mem)
(VPMAXUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked128load {sym} [off] x ptr mask mem)
(VPMAXUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked256load {sym} [off] x ptr mask mem)
(VPMAXUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked512load {sym} [off] x ptr mask mem)
(VPMAXUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked128load {sym} [off] x ptr mask mem)
(VPMAXUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked256load {sym} [off] x ptr mask mem)
(VPMAXUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked512load {sym} [off] x ptr mask mem)
(VMINPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS128load {sym} [off] x ptr mem)
(VMINPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS256load {sym} [off] x ptr mem)
(VMINPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS512load {sym} [off] x ptr mem)
(VMINPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD128load {sym} [off] x ptr mem)
(VMINPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD256load {sym} [off] x ptr mem)
(VMINPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD512load {sym} [off] x ptr mem)
(VPMINSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD128load {sym} [off] x ptr mem)
(VPMINSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD256load {sym} [off] x ptr mem)
(VPMINSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD512load {sym} [off] x ptr mem)
(VPMINSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ128load {sym} [off] x ptr mem)
(VPMINSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ256load {sym} [off] x ptr mem)
(VPMINSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ512load {sym} [off] x ptr mem)
(VPMINUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD128load {sym} [off] x ptr mem)
(VPMINUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD256load {sym} [off] x ptr mem)
(VPMINUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD512load {sym} [off] x ptr mem)
(VPMINUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ128load {sym} [off] x ptr mem)
(VPMINUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ256load {sym} [off] x ptr mem)
(VPMINUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ512load {sym} [off] x ptr mem)
(VMINPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPSMasked128load {sym} [off] x ptr mask mem)
(VMINPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPSMasked256load {sym} [off] x ptr mask mem)
(VMINPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPSMasked512load {sym} [off] x ptr mask mem)
(VMINPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked128load {sym} [off] x ptr mask mem)
(VMINPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked256load {sym} [off] x ptr mask mem)
(VMINPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked512load {sym} [off] x ptr mask mem)
(VPMINSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked128load {sym} [off] x ptr mask mem)
(VPMINSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked256load {sym} [off] x ptr mask mem)
(VPMINSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked512load {sym} [off] x ptr mask mem)
(VPMINSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked128load {sym} [off] x ptr mask mem)
(VPMINSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked256load {sym} [off] x ptr mask mem)
(VPMINSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked512load {sym} [off] x ptr mask mem)
(VPMINUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked128load {sym} [off] x ptr mask mem)
(VPMINUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked256load {sym} [off] x ptr mask mem)
(VPMINUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked512load {sym} [off] x ptr mask mem)
(VPMINUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked128load {sym} [off] x ptr mask mem)
(VPMINUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked256load {sym} [off] x ptr mask mem)
(VPMINUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked512load {sym} [off] x ptr mask mem)
(VMULPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS128load {sym} [off] x ptr mem)
(VMULPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS256load {sym} [off] x ptr mem)
(VMULPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS512load {sym} [off] x ptr mem)
(VMULPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD128load {sym} [off] x ptr mem)
(VMULPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD256load {sym} [off] x ptr mem)
(VMULPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD512load {sym} [off] x ptr mem)
(VPMULLD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD128load {sym} [off] x ptr mem)
(VPMULLD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD256load {sym} [off] x ptr mem)
(VPMULLD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD512load {sym} [off] x ptr mem)
(VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ128load {sym} [off] x ptr mem)
(VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ256load {sym} [off] x ptr mem)
(VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ512load {sym} [off] x ptr mem)
(VFMADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS128load {sym} [off] x y ptr mem)
(VFMADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS256load {sym} [off] x y ptr mem)
(VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS512load {sym} [off] x y ptr mem)
(VFMADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD128load {sym} [off] x y ptr mem)
(VFMADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD256load {sym} [off] x y ptr mem)
(VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD512load {sym} [off] x y ptr mem)
(VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem)
(VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem)
(VFMADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked512load {sym} [off] x y ptr mask mem)
(VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem)
(VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem)
(VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS128load {sym} [off] x y ptr mem)
(VFMADDSUB213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS256load {sym} [off] x y ptr mem)
(VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS512load {sym} [off] x y ptr mem)
(VFMADDSUB213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD128load {sym} [off] x y ptr mem)
(VFMADDSUB213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD256load {sym} [off] x y ptr mem)
(VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD512load {sym} [off] x y ptr mem)
(VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked512load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked128load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked256load {sym} [off] x y ptr mask mem)
(VFMADDSUB213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked512load {sym} [off] x y ptr mask mem)
(VPMULDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULDQ128load {sym} [off] x ptr mem)
(VPMULDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULDQ256load {sym} [off] x ptr mem)
(VPMULUDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULUDQ128load {sym} [off] x ptr mem)
(VPMULUDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULUDQ256load {sym} [off] x ptr mem)
(VMULPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked128load {sym} [off] x ptr mask mem)
(VMULPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked256load {sym} [off] x ptr mask mem)
(VMULPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked512load {sym} [off] x ptr mask mem)
(VMULPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked128load {sym} [off] x ptr mask mem)
(VMULPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked256load {sym} [off] x ptr mask mem)
(VMULPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked512load {sym} [off] x ptr mask mem)
(VPMULLDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked128load {sym} [off] x ptr mask mem)
(VPMULLDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked256load {sym} [off] x ptr mask mem)
(VPMULLDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked512load {sym} [off] x ptr mask mem)
(VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked128load {sym} [off] x ptr mask mem)
(VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked256load {sym} [off] x ptr mask mem)
(VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked512load {sym} [off] x ptr mask mem)
(VFMSUBADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS128load {sym} [off] x y ptr mem)
(VFMSUBADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS256load {sym} [off] x y ptr mem)
(VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS512load {sym} [off] x y ptr mem)
(VFMSUBADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD128load {sym} [off] x y ptr mem)
(VFMSUBADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD256load {sym} [off] x y ptr mem)
(VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD512load {sym} [off] x y ptr mem)
(VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked512load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked128load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked256load {sym} [off] x y ptr mask mem)
(VFMSUBADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked512load {sym} [off] x y ptr mask mem)
(VPOPCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD128load {sym} [off] ptr mem)
(VPOPCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD256load {sym} [off] ptr mem)
(VPOPCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD512load {sym} [off] ptr mem)
(VPOPCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ128load {sym} [off] ptr mem)
(VPOPCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ256load {sym} [off] ptr mem)
(VPOPCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ512load {sym} [off] ptr mem)
(VPOPCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked128load {sym} [off] ptr mask mem)
(VPOPCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked256load {sym} [off] ptr mask mem)
(VPOPCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked512load {sym} [off] ptr mask mem)
(VPOPCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked128load {sym} [off] ptr mask mem)
(VPOPCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked256load {sym} [off] ptr mask mem)
(VPOPCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked512load {sym} [off] ptr mask mem)
(VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPORD512load {sym} [off] x ptr mem)
(VPORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPORQ512load {sym} [off] x ptr mem)
(VPORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked128load {sym} [off] x ptr mask mem)
(VPORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked256load {sym} [off] x ptr mask mem)
(VPORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked512load {sym} [off] x ptr mask mem)
(VPORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked128load {sym} [off] x ptr mask mem)
(VPORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked256load {sym} [off] x ptr mask mem)
(VPORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked512load {sym} [off] x ptr mask mem)
(VPERMPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPS256load {sym} [off] x ptr mem)
(VPERMD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMD256load {sym} [off] x ptr mem)
(VPERMPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPS512load {sym} [off] x ptr mem)
(VPERMD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMD512load {sym} [off] x ptr mem)
(VPERMPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD256load {sym} [off] x ptr mem)
(VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ256load {sym} [off] x ptr mem)
(VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD512load {sym} [off] x ptr mem)
(VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ512load {sym} [off] x ptr mem)
(VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem)
(VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem)
(VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem)
(VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D256load {sym} [off] x y ptr mem)
(VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS512load {sym} [off] x y ptr mem)
(VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D512load {sym} [off] x y ptr mem)
(VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD128load {sym} [off] x y ptr mem)
(VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q128load {sym} [off] x y ptr mem)
(VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD256load {sym} [off] x y ptr mem)
(VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q256load {sym} [off] x y ptr mem)
(VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD512load {sym} [off] x y ptr mem)
(VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q512load {sym} [off] x y ptr mem)
(VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem)
(VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked512load {sym} [off] x y ptr mask mem)
(VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked128load {sym} [off] x y ptr mask mem)
(VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem)
(VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem)
(VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem)
(VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked256load {sym} [off] x ptr mask mem)
(VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked256load {sym} [off] x ptr mask mem)
(VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked512load {sym} [off] x ptr mask mem)
(VPERMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked512load {sym} [off] x ptr mask mem)
(VPERMPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPDMasked256load {sym} [off] x ptr mask mem)
(VPERMQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMQMasked256load {sym} [off] x ptr mask mem)
(VPERMPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPDMasked512load {sym} [off] x ptr mask mem)
(VPERMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMQMasked512load {sym} [off] x ptr mask mem)
(VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PS512load {sym} [off] ptr mem)
(VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD128load {sym} [off] ptr mem)
(VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD256load {sym} [off] ptr mem)
(VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD512load {sym} [off] ptr mem)
(VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PSMasked128load {sym} [off] ptr mask mem)
(VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PSMasked256load {sym} [off] ptr mask mem)
(VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PSMasked512load {sym} [off] ptr mask mem)
(VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked128load {sym} [off] ptr mask mem)
(VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked256load {sym} [off] ptr mask mem)
(VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked512load {sym} [off] ptr mask mem)
(VRSQRT14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PS512load {sym} [off] ptr mem)
(VRSQRT14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD128load {sym} [off] ptr mem)
(VRSQRT14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD256load {sym} [off] ptr mem)
(VRSQRT14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD512load {sym} [off] ptr mem)
(VRSQRT14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PSMasked128load {sym} [off] ptr mask mem)
(VRSQRT14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PSMasked256load {sym} [off] ptr mask mem)
(VRSQRT14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PSMasked512load {sym} [off] ptr mask mem)
(VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked128load {sym} [off] ptr mask mem)
(VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked256load {sym} [off] ptr mask mem)
(VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked512load {sym} [off] ptr mask mem)
(VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD128load {sym} [off] x ptr mem)
(VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD256load {sym} [off] x ptr mem)
(VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD512load {sym} [off] x ptr mem)
(VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVQ128load {sym} [off] x ptr mem)
(VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVQ256load {sym} [off] x ptr mem)
(VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVQ512load {sym} [off] x ptr mem)
(VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVDMasked128load {sym} [off] x ptr mask mem)
(VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVDMasked256load {sym} [off] x ptr mask mem)
(VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVDMasked512load {sym} [off] x ptr mask mem)
(VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVQMasked128load {sym} [off] x ptr mask mem)
(VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVQMasked256load {sym} [off] x ptr mask mem)
(VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVQMasked512load {sym} [off] x ptr mask mem)
(VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVD128load {sym} [off] x ptr mem)
(VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVD256load {sym} [off] x ptr mem)
(VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVD512load {sym} [off] x ptr mem)
(VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVQ128load {sym} [off] x ptr mem)
(VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVQ256load {sym} [off] x ptr mem)
(VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVQ512load {sym} [off] x ptr mem)
(VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVDMasked128load {sym} [off] x ptr mask mem)
(VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVDMasked256load {sym} [off] x ptr mask mem)
(VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVDMasked512load {sym} [off] x ptr mask mem)
(VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked128load {sym} [off] x ptr mask mem)
(VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem)
(VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem)
(VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS128load {sym} [off] x ptr mem)
(VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS256load {sym} [off] x ptr mem)
(VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS512load {sym} [off] x ptr mem)
(VSCALEFPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPD128load {sym} [off] x ptr mem)
(VSCALEFPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPD256load {sym} [off] x ptr mem)
(VSCALEFPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPD512load {sym} [off] x ptr mem)
(VSCALEFPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPSMasked128load {sym} [off] x ptr mask mem)
(VSCALEFPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPSMasked256load {sym} [off] x ptr mask mem)
(VSCALEFPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPSMasked512load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked128load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked256load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked512load {sym} [off] x ptr mask mem)
(VPSLLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD128load {sym} [off] x ptr mem)
(VPSLLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD256load {sym} [off] x ptr mem)
(VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD512load {sym} [off] x ptr mem)
(VPSLLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ128load {sym} [off] x ptr mem)
(VPSLLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ256load {sym} [off] x ptr mem)
(VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ512load {sym} [off] x ptr mem)
(VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD128load {sym} [off] x y ptr mem)
(VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD256load {sym} [off] x y ptr mem)
(VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD512load {sym} [off] x y ptr mem)
(VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ128load {sym} [off] x y ptr mem)
(VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ256load {sym} [off] x y ptr mem)
(VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ512load {sym} [off] x y ptr mem)
(VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem)
(VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem)
(VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem)
(VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem)
(VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem)
(VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem)
(VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked128load {sym} [off] x ptr mask mem)
(VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked256load {sym} [off] x ptr mask mem)
(VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked512load {sym} [off] x ptr mask mem)
(VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked128load {sym} [off] x ptr mask mem)
(VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked256load {sym} [off] x ptr mask mem)
(VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked512load {sym} [off] x ptr mask mem)
(VPSRAVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD128load {sym} [off] x ptr mem)
(VPSRAVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD256load {sym} [off] x ptr mem)
(VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD512load {sym} [off] x ptr mem)
(VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ128load {sym} [off] x ptr mem)
(VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ256load {sym} [off] x ptr mem)
(VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ512load {sym} [off] x ptr mem)
(VPSRLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD128load {sym} [off] x ptr mem)
(VPSRLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD256load {sym} [off] x ptr mem)
(VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD512load {sym} [off] x ptr mem)
(VPSRLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ128load {sym} [off] x ptr mem)
(VPSRLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ256load {sym} [off] x ptr mem)
(VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ512load {sym} [off] x ptr mem)
(VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD128load {sym} [off] x y ptr mem)
(VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD256load {sym} [off] x y ptr mem)
(VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD512load {sym} [off] x y ptr mem)
(VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ128load {sym} [off] x y ptr mem)
(VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ256load {sym} [off] x y ptr mem)
(VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ512load {sym} [off] x y ptr mem)
(VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem)
(VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem)
(VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem)
(VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem)
(VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem)
(VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem)
(VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked128load {sym} [off] x ptr mask mem)
(VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked256load {sym} [off] x ptr mask mem)
(VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked512load {sym} [off] x ptr mask mem)
(VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked128load {sym} [off] x ptr mask mem)
(VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked256load {sym} [off] x ptr mask mem)
(VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked512load {sym} [off] x ptr mask mem)
(VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked128load {sym} [off] x ptr mask mem)
(VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked256load {sym} [off] x ptr mask mem)
(VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked512load {sym} [off] x ptr mask mem)
(VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked128load {sym} [off] x ptr mask mem)
(VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked256load {sym} [off] x ptr mask mem)
(VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked512load {sym} [off] x ptr mask mem)
(VSQRTPS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS128load {sym} [off] ptr mem)
(VSQRTPS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS256load {sym} [off] ptr mem)
(VSQRTPS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS512load {sym} [off] ptr mem)
(VSQRTPD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD128load {sym} [off] ptr mem)
(VSQRTPD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD256load {sym} [off] ptr mem)
(VSQRTPD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD512load {sym} [off] ptr mem)
(VSQRTPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked128load {sym} [off] ptr mask mem)
(VSQRTPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked256load {sym} [off] ptr mask mem)
(VSQRTPSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked512load {sym} [off] ptr mask mem)
(VSQRTPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked128load {sym} [off] ptr mask mem)
(VSQRTPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked256load {sym} [off] ptr mask mem)
(VSQRTPDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked512load {sym} [off] ptr mask mem)
(VSUBPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS128load {sym} [off] x ptr mem)
(VSUBPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS256load {sym} [off] x ptr mem)
(VSUBPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS512load {sym} [off] x ptr mem)
(VSUBPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD128load {sym} [off] x ptr mem)
(VSUBPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD256load {sym} [off] x ptr mem)
(VSUBPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD512load {sym} [off] x ptr mem)
(VPSUBD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD128load {sym} [off] x ptr mem)
(VPSUBD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD256load {sym} [off] x ptr mem)
(VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD512load {sym} [off] x ptr mem)
(VPSUBQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ128load {sym} [off] x ptr mem)
(VPSUBQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ256load {sym} [off] x ptr mem)
(VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ512load {sym} [off] x ptr mem)
(VSUBPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked128load {sym} [off] x ptr mask mem)
(VSUBPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked256load {sym} [off] x ptr mask mem)
(VSUBPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked512load {sym} [off] x ptr mask mem)
(VSUBPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked128load {sym} [off] x ptr mask mem)
(VSUBPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked256load {sym} [off] x ptr mask mem)
(VSUBPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked512load {sym} [off] x ptr mask mem)
(VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked128load {sym} [off] x ptr mask mem)
(VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked256load {sym} [off] x ptr mask mem)
(VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked512load {sym} [off] x ptr mask mem)
(VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked128load {sym} [off] x ptr mask mem)
(VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked256load {sym} [off] x ptr mask mem)
(VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked512load {sym} [off] x ptr mask mem)
(VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXORD512load {sym} [off] x ptr mem)
(VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXORQ512load {sym} [off] x ptr mem)
(VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked128load {sym} [off] x ptr mask mem)
(VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked256load {sym} [off] x ptr mask mem)
(VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked512load {sym} [off] x ptr mask mem)
(VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked128load {sym} [off] x ptr mask mem)
(VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked256load {sym} [off] x ptr mask mem)
(VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked512load {sym} [off] x ptr mask mem)
(VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMDMasked512load {sym} [off] x ptr mask mem)
(VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem)

File diff suppressed because it is too large Load diff

View file

@ -22,6 +22,9 @@ type tplRuleData struct {
MaskInConvert string // e.g. "VPMOVVec32x8ToM"
MaskOutConvert string // e.g. "VPMOVMToVec32x8"
ElementSize int // e.g. 32
Size int // e.g. 128
ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask.
ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end.
}
var (
@ -38,6 +41,8 @@ var (
{{end}}
{{define "masksftimm"}}({{.Asm}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x mask)
{{end}}
{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}})
{{end}}
`))
)
@ -85,6 +90,7 @@ var tmplOrder = map[string]int{
"maskOut": 3,
"maskIn": 4,
"pureVreg": 5,
"vregMem": 6,
}
func compareTplRuleData(x, y tplRuleData) int {
@ -118,7 +124,9 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
buffer.WriteString(generatedHeader + "\n")
var allData []tplRuleData
var optData []tplRuleData // for peephole optimizations
var optData []tplRuleData // for mask peephole optimizations, and other misc
var memOptData []tplRuleData // for memory peephole optimizations
memOpSeen := make(map[string]bool)
for _, opr := range ops {
opInShape, opOutShape, maskType, immType, gOp := opr.shape()
@ -228,6 +236,39 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?")
}
}
if gOp.MemFeatures != nil && *gOp.MemFeatures == "vbcst" && immType == NoImm {
// sanity check
selected := true
for _, a := range gOp.In {
if a.TreatLikeAScalarOfSize != nil {
selected = false
break
}
}
if _, ok := memOpSeen[data.Asm]; ok {
selected = false
}
if selected {
memOpSeen[data.Asm] = true
lastVreg := gOp.In[vregInCnt-1]
// sanity check
if lastVreg.Class != "vreg" {
panic(fmt.Errorf("simdgen expects vbcst replaced operand to be a vreg, but %v found", lastVreg))
}
memOpData := data
// Remove the last vreg from the arg and change it to a load.
memOpData.ArgsLoadAddr = data.Args[:len(data.Args)-1] + fmt.Sprintf("l:(VMOVDQUload%d {sym} [off] ptr mem)", *lastVreg.Bits)
// Remove the last vreg from the arg and change it to "ptr".
memOpData.ArgsAddr = "{sym} [off] " + data.Args[:len(data.Args)-1] + "ptr"
if maskType == OneMask {
memOpData.ArgsAddr += " mask"
memOpData.ArgsLoadAddr += " mask"
}
memOpData.ArgsAddr += " mem"
memOpData.tplName = "vregMem"
memOptData = append(memOptData, memOpData)
}
}
if tplName == "pureVreg" && data.Args == data.ArgsOut {
data.Args = "..."
@ -262,5 +303,11 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
}
}
for _, data := range memOptData {
if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil {
panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.Asm, err))
}
}
return buffer
}