[dev.simd] cmd/compile, simd/_gen/simdgen: add const load mops

This CL adds the load + const imm8 variants ofr many instructions.

Change-Id: I46116906077e33eabccc111be6d16019002f3474
Reviewed-on: https://go-review.googlesource.com/c/go/+/703395
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Junyang Shao 2025-09-12 16:43:30 +00:00
parent 1e5631d4e0
commit 3ec0b25ab7
9 changed files with 3639 additions and 33 deletions

View file

@ -1365,6 +1365,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VCVTPS2UDQMasked128load, ssa.OpAMD64VCVTPS2UDQMasked128load,
ssa.OpAMD64VCVTPS2UDQMasked256load, ssa.OpAMD64VCVTPS2UDQMasked256load,
ssa.OpAMD64VCVTPS2UDQMasked512load, ssa.OpAMD64VCVTPS2UDQMasked512load,
ssa.OpAMD64VPLZCNTDMasked128load,
ssa.OpAMD64VPLZCNTDMasked256load,
ssa.OpAMD64VPLZCNTDMasked512load,
ssa.OpAMD64VPLZCNTQMasked128load,
ssa.OpAMD64VPLZCNTQMasked256load,
ssa.OpAMD64VPLZCNTQMasked512load,
ssa.OpAMD64VPOPCNTDMasked128load, ssa.OpAMD64VPOPCNTDMasked128load,
ssa.OpAMD64VPOPCNTDMasked256load, ssa.OpAMD64VPOPCNTDMasked256load,
ssa.OpAMD64VPOPCNTDMasked512load, ssa.OpAMD64VPOPCNTDMasked512load,
@ -1839,6 +1845,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VCVTPS2UDQ128load, ssa.OpAMD64VCVTPS2UDQ128load,
ssa.OpAMD64VCVTPS2UDQ256load, ssa.OpAMD64VCVTPS2UDQ256load,
ssa.OpAMD64VCVTPS2UDQ512load, ssa.OpAMD64VCVTPS2UDQ512load,
ssa.OpAMD64VPLZCNTD128load,
ssa.OpAMD64VPLZCNTD256load,
ssa.OpAMD64VPLZCNTD512load,
ssa.OpAMD64VPLZCNTQ128load,
ssa.OpAMD64VPLZCNTQ256load,
ssa.OpAMD64VPLZCNTQ512load,
ssa.OpAMD64VPOPCNTD128load, ssa.OpAMD64VPOPCNTD128load,
ssa.OpAMD64VPOPCNTD256load, ssa.OpAMD64VPOPCNTD256load,
ssa.OpAMD64VPOPCNTD512load, ssa.OpAMD64VPOPCNTD512load,
@ -1861,6 +1873,172 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VSQRTPD512load: ssa.OpAMD64VSQRTPD512load:
p = simdV11load(s, v) p = simdV11load(s, v)
case ssa.OpAMD64VRNDSCALEPS128load,
ssa.OpAMD64VRNDSCALEPS256load,
ssa.OpAMD64VRNDSCALEPS512load,
ssa.OpAMD64VRNDSCALEPD128load,
ssa.OpAMD64VRNDSCALEPD256load,
ssa.OpAMD64VRNDSCALEPD512load,
ssa.OpAMD64VREDUCEPS128load,
ssa.OpAMD64VREDUCEPS256load,
ssa.OpAMD64VREDUCEPS512load,
ssa.OpAMD64VREDUCEPD128load,
ssa.OpAMD64VREDUCEPD256load,
ssa.OpAMD64VREDUCEPD512load,
ssa.OpAMD64VPSHUFD128load,
ssa.OpAMD64VPSHUFD256load,
ssa.OpAMD64VPSHUFD512load,
ssa.OpAMD64VPROLD128load,
ssa.OpAMD64VPROLD256load,
ssa.OpAMD64VPROLD512load,
ssa.OpAMD64VPROLQ128load,
ssa.OpAMD64VPROLQ256load,
ssa.OpAMD64VPROLQ512load,
ssa.OpAMD64VPRORD128load,
ssa.OpAMD64VPRORD256load,
ssa.OpAMD64VPRORD512load,
ssa.OpAMD64VPRORQ128load,
ssa.OpAMD64VPRORQ256load,
ssa.OpAMD64VPRORQ512load,
ssa.OpAMD64VPSLLD128constload,
ssa.OpAMD64VPSLLD256constload,
ssa.OpAMD64VPSLLD512constload,
ssa.OpAMD64VPSLLQ128constload,
ssa.OpAMD64VPSLLQ256constload,
ssa.OpAMD64VPSLLQ512constload,
ssa.OpAMD64VPSRLD128constload,
ssa.OpAMD64VPSRLD256constload,
ssa.OpAMD64VPSRLD512constload,
ssa.OpAMD64VPSRLQ128constload,
ssa.OpAMD64VPSRLQ256constload,
ssa.OpAMD64VPSRLQ512constload,
ssa.OpAMD64VPSRAD128constload,
ssa.OpAMD64VPSRAD256constload,
ssa.OpAMD64VPSRAD512constload,
ssa.OpAMD64VPSRAQ128constload,
ssa.OpAMD64VPSRAQ256constload,
ssa.OpAMD64VPSRAQ512constload:
p = simdV11loadImm8(s, v)
case ssa.OpAMD64VRNDSCALEPSMasked128load,
ssa.OpAMD64VRNDSCALEPSMasked256load,
ssa.OpAMD64VRNDSCALEPSMasked512load,
ssa.OpAMD64VRNDSCALEPDMasked128load,
ssa.OpAMD64VRNDSCALEPDMasked256load,
ssa.OpAMD64VRNDSCALEPDMasked512load,
ssa.OpAMD64VREDUCEPSMasked128load,
ssa.OpAMD64VREDUCEPSMasked256load,
ssa.OpAMD64VREDUCEPSMasked512load,
ssa.OpAMD64VREDUCEPDMasked128load,
ssa.OpAMD64VREDUCEPDMasked256load,
ssa.OpAMD64VREDUCEPDMasked512load,
ssa.OpAMD64VPSHUFDMasked256load,
ssa.OpAMD64VPSHUFDMasked512load,
ssa.OpAMD64VPSHUFDMasked128load,
ssa.OpAMD64VPROLDMasked128load,
ssa.OpAMD64VPROLDMasked256load,
ssa.OpAMD64VPROLDMasked512load,
ssa.OpAMD64VPROLQMasked128load,
ssa.OpAMD64VPROLQMasked256load,
ssa.OpAMD64VPROLQMasked512load,
ssa.OpAMD64VPRORDMasked128load,
ssa.OpAMD64VPRORDMasked256load,
ssa.OpAMD64VPRORDMasked512load,
ssa.OpAMD64VPRORQMasked128load,
ssa.OpAMD64VPRORQMasked256load,
ssa.OpAMD64VPRORQMasked512load,
ssa.OpAMD64VPSLLDMasked128constload,
ssa.OpAMD64VPSLLDMasked256constload,
ssa.OpAMD64VPSLLDMasked512constload,
ssa.OpAMD64VPSLLQMasked128constload,
ssa.OpAMD64VPSLLQMasked256constload,
ssa.OpAMD64VPSLLQMasked512constload,
ssa.OpAMD64VPSRLDMasked128constload,
ssa.OpAMD64VPSRLDMasked256constload,
ssa.OpAMD64VPSRLDMasked512constload,
ssa.OpAMD64VPSRLQMasked128constload,
ssa.OpAMD64VPSRLQMasked256constload,
ssa.OpAMD64VPSRLQMasked512constload,
ssa.OpAMD64VPSRADMasked128constload,
ssa.OpAMD64VPSRADMasked256constload,
ssa.OpAMD64VPSRADMasked512constload,
ssa.OpAMD64VPSRAQMasked128constload,
ssa.OpAMD64VPSRAQMasked256constload,
ssa.OpAMD64VPSRAQMasked512constload:
p = simdVkvloadImm8(s, v)
case ssa.OpAMD64VCMPPS128load,
ssa.OpAMD64VCMPPS256load,
ssa.OpAMD64VCMPPD128load,
ssa.OpAMD64VCMPPD256load,
ssa.OpAMD64VGF2P8AFFINEQB128load,
ssa.OpAMD64VGF2P8AFFINEQB256load,
ssa.OpAMD64VGF2P8AFFINEQB512load,
ssa.OpAMD64VGF2P8AFFINEINVQB128load,
ssa.OpAMD64VGF2P8AFFINEINVQB256load,
ssa.OpAMD64VGF2P8AFFINEINVQB512load,
ssa.OpAMD64VPSHLDD128load,
ssa.OpAMD64VPSHLDD256load,
ssa.OpAMD64VPSHLDD512load,
ssa.OpAMD64VPSHLDQ128load,
ssa.OpAMD64VPSHLDQ256load,
ssa.OpAMD64VPSHLDQ512load,
ssa.OpAMD64VPSHRDD128load,
ssa.OpAMD64VPSHRDD256load,
ssa.OpAMD64VPSHRDD512load,
ssa.OpAMD64VPSHRDQ128load,
ssa.OpAMD64VPSHRDQ256load,
ssa.OpAMD64VPSHRDQ512load:
p = simdV21loadImm8(s, v)
case ssa.OpAMD64VCMPPS512load,
ssa.OpAMD64VCMPPD512load,
ssa.OpAMD64VPCMPUD512load,
ssa.OpAMD64VPCMPUQ512load,
ssa.OpAMD64VPCMPD512load,
ssa.OpAMD64VPCMPQ512load:
p = simdV2kloadImm8(s, v)
case ssa.OpAMD64VCMPPSMasked128load,
ssa.OpAMD64VCMPPSMasked256load,
ssa.OpAMD64VCMPPSMasked512load,
ssa.OpAMD64VCMPPDMasked128load,
ssa.OpAMD64VCMPPDMasked256load,
ssa.OpAMD64VCMPPDMasked512load,
ssa.OpAMD64VPCMPDMasked128load,
ssa.OpAMD64VPCMPDMasked256load,
ssa.OpAMD64VPCMPDMasked512load,
ssa.OpAMD64VPCMPQMasked128load,
ssa.OpAMD64VPCMPQMasked256load,
ssa.OpAMD64VPCMPQMasked512load,
ssa.OpAMD64VPCMPUDMasked128load,
ssa.OpAMD64VPCMPUDMasked256load,
ssa.OpAMD64VPCMPUDMasked512load,
ssa.OpAMD64VPCMPUQMasked128load,
ssa.OpAMD64VPCMPUQMasked256load,
ssa.OpAMD64VPCMPUQMasked512load:
p = simdV2kkloadImm8(s, v)
case ssa.OpAMD64VGF2P8AFFINEINVQBMasked128load,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked256load,
ssa.OpAMD64VGF2P8AFFINEINVQBMasked512load,
ssa.OpAMD64VGF2P8AFFINEQBMasked128load,
ssa.OpAMD64VGF2P8AFFINEQBMasked256load,
ssa.OpAMD64VGF2P8AFFINEQBMasked512load,
ssa.OpAMD64VPSHLDDMasked128load,
ssa.OpAMD64VPSHLDDMasked256load,
ssa.OpAMD64VPSHLDDMasked512load,
ssa.OpAMD64VPSHLDQMasked128load,
ssa.OpAMD64VPSHLDQMasked256load,
ssa.OpAMD64VPSHLDQMasked512load,
ssa.OpAMD64VPSHRDDMasked128load,
ssa.OpAMD64VPSHRDDMasked256load,
ssa.OpAMD64VPSHRDDMasked512load,
ssa.OpAMD64VPSHRDQMasked128load,
ssa.OpAMD64VPSHRDQMasked256load,
ssa.OpAMD64VPSHRDQMasked512load:
p = simdV2kvloadImm8(s, v)
default: default:
// Unknown reg shape // Unknown reg shape
return false return false

View file

@ -2211,6 +2211,97 @@ func simdV11load(s *ssagen.State, v *ssa.Value) *obj.Prog {
return p return p
} }
// Example instruction: VPSHUFD $7, (BX), X11
func simdV11loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
sc := v.AuxValAndOff()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = sc.Val64()
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
ssagen.AddAux2(&m, v, sc.Off64())
p.AddRestSource(m)
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPRORD $81, -15(R14), K7, Y1
func simdVkvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
sc := v.AuxValAndOff()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = sc.Val64()
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
ssagen.AddAux2(&m, v, sc.Off64())
p.AddRestSource(m)
p.AddRestSourceReg(maskReg(v.Args[1]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VPSHLDD $82, 7(SI), Y21, Y3
func simdV21loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
sc := v.AuxValAndOff()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = sc.Val64()
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
ssagen.AddAux2(&m, v, sc.Off64())
p.AddRestSource(m)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
// Example instruction: VCMPPS $81, -7(DI), Y16, K3
func simdV2kloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
sc := v.AuxValAndOff()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = sc.Val64()
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
ssagen.AddAux2(&m, v, sc.Off64())
p.AddRestSource(m)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.To.Type = obj.TYPE_REG
p.To.Reg = maskReg(v)
return p
}
// Example instruction: VCMPPS $81, -7(DI), Y16, K1, K3
func simdV2kkloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
sc := v.AuxValAndOff()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = sc.Val64()
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
ssagen.AddAux2(&m, v, sc.Off64())
p.AddRestSource(m)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.AddRestSourceReg(maskReg(v.Args[2]))
p.To.Type = obj.TYPE_REG
p.To.Reg = maskReg(v)
return p
}
// Example instruction: VGF2P8AFFINEINVQB $64, -17(BP), X31, K3, X26
func simdV2kvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
sc := v.AuxValAndOff()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = sc.Val64()
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
ssagen.AddAux2(&m, v, sc.Off64())
p.AddRestSource(m)
p.AddRestSourceReg(simdReg(v.Args[0]))
p.AddRestSourceReg(maskReg(v.Args[2]))
p.To.Type = obj.TYPE_REG
p.To.Reg = simdReg(v)
return p
}
var blockJump = [...]struct { var blockJump = [...]struct {
asm, invasm obj.As asm, invasm obj.As
}{ }{

View file

@ -256,6 +256,7 @@ func init() {
w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly} w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly}
w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly} w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly}
w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15 w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w2kkload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: maskonly}
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly} kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}} kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
@ -1459,7 +1460,7 @@ func init() {
genSIMDfile: "../../amd64/simdssa.go", genSIMDfile: "../../amd64/simdssa.go",
ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv,
w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, wkwload, v21load, v31load, v11load, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, wkwload, v21load, v31load, v11load,
w21load, w31load, w2kload, w2kwload, w11load, w3kwload)...), // AMD64ops, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload)...), // AMD64ops,
blocks: AMD64blocks, blocks: AMD64blocks,
regnames: regNamesAMD64, regnames: regNamesAMD64,
ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11", ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11",

View file

@ -1605,6 +1605,18 @@
(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem) (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem)
(VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem) (VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem)
(VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem) (VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem)
(VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD128load {sym} [off] ptr mem)
(VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD256load {sym} [off] ptr mem)
(VPLZCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD512load {sym} [off] ptr mem)
(VPLZCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQ128load {sym} [off] ptr mem)
(VPLZCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQ256load {sym} [off] ptr mem)
(VPLZCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQ512load {sym} [off] ptr mem)
(VPLZCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTDMasked128load {sym} [off] ptr mask mem)
(VPLZCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTDMasked256load {sym} [off] ptr mask mem)
(VPLZCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTDMasked512load {sym} [off] ptr mask mem)
(VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked128load {sym} [off] ptr mask mem)
(VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked256load {sym} [off] ptr mask mem)
(VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked512load {sym} [off] ptr mask mem)
(VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem) (VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem)
(VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem) (VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem)
(VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem) (VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem)

View file

@ -3,7 +3,7 @@
package main package main
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload regInfo) []opData { wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData {
return []opData{ return []opData{
{name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false},
@ -1446,6 +1446,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPUNPCKLDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPUNPCKLDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPUNPCKLQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPUNPCKLQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPUNPCKLQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTD128load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTD256load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTD512load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQ128load", argLength: 2, reg: w11load, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQ256load", argLength: 2, reg: w11load, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQ512load", argLength: 2, reg: w11load, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTDMasked128load", argLength: 3, reg: wkwload, asm: "VPLZCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTDMasked256load", argLength: 3, reg: wkwload, asm: "VPLZCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTDMasked512load", argLength: 3, reg: wkwload, asm: "VPLZCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQMasked128load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQMasked256load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPLZCNTQMasked512load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS128load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VMAXPS128load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS256load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VMAXPS256load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VMAXPS512load", argLength: 3, reg: w21load, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VMAXPS512load", argLength: 3, reg: w21load, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@ -1804,5 +1816,159 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPXORQMasked512load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPXORQMasked512load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBLENDMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPBLENDMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPBLENDMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPBLENDMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPS128load", argLength: 2, reg: w11load, asm: "VRNDSCALEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPS256load", argLength: 2, reg: w11load, asm: "VRNDSCALEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPS512load", argLength: 2, reg: w11load, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPD128load", argLength: 2, reg: w11load, asm: "VRNDSCALEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPD256load", argLength: 2, reg: w11load, asm: "VRNDSCALEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPD512load", argLength: 2, reg: w11load, asm: "VRNDSCALEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPSMasked128load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPSMasked256load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPSMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPDMasked128load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPDMasked256load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VRNDSCALEPDMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPS128load", argLength: 2, reg: w11load, asm: "VREDUCEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPS256load", argLength: 2, reg: w11load, asm: "VREDUCEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPS512load", argLength: 2, reg: w11load, asm: "VREDUCEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPD128load", argLength: 2, reg: w11load, asm: "VREDUCEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPD256load", argLength: 2, reg: w11load, asm: "VREDUCEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPD512load", argLength: 2, reg: w11load, asm: "VREDUCEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPSMasked128load", argLength: 3, reg: wkwload, asm: "VREDUCEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPSMasked256load", argLength: 3, reg: wkwload, asm: "VREDUCEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPSMasked512load", argLength: 3, reg: wkwload, asm: "VREDUCEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPDMasked128load", argLength: 3, reg: wkwload, asm: "VREDUCEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPDMasked256load", argLength: 3, reg: wkwload, asm: "VREDUCEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VREDUCEPDMasked512load", argLength: 3, reg: wkwload, asm: "VREDUCEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPS128load", argLength: 3, reg: v21load, asm: "VCMPPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPS256load", argLength: 3, reg: v21load, asm: "VCMPPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPS512load", argLength: 3, reg: w2kload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPD128load", argLength: 3, reg: v21load, asm: "VCMPPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPD256load", argLength: 3, reg: v21load, asm: "VCMPPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPD512load", argLength: 3, reg: w2kload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPSMasked128load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPSMasked256load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPSMasked512load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPDMasked128load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPDMasked256load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VCMPPDMasked512load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPDMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPDMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPDMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPQMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPQMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPQMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUDMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUDMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUDMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUQMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUQMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUQMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQB128load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQB256load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQB512load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB128load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB256load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB512load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEINVQBMasked128load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEINVQBMasked256load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEINVQBMasked512load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked128load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked256load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked512load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUD512load", argLength: 3, reg: w2kload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPUQ512load", argLength: 3, reg: w2kload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPD512load", argLength: 3, reg: w2kload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPQ512load", argLength: 3, reg: w2kload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFD128load", argLength: 2, reg: v11load, asm: "VPSHUFD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFD256load", argLength: 2, reg: v11load, asm: "VPSHUFD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFD512load", argLength: 2, reg: w11load, asm: "VPSHUFD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFDMasked256load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFDMasked512load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHUFDMasked128load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLD128load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLD256load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLD512load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLQ128load", argLength: 2, reg: w11load, asm: "VPROLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLQ256load", argLength: 2, reg: w11load, asm: "VPROLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLQ512load", argLength: 2, reg: w11load, asm: "VPROLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLDMasked128load", argLength: 3, reg: wkwload, asm: "VPROLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLDMasked256load", argLength: 3, reg: wkwload, asm: "VPROLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLDMasked512load", argLength: 3, reg: wkwload, asm: "VPROLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLQMasked128load", argLength: 3, reg: wkwload, asm: "VPROLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLQMasked256load", argLength: 3, reg: wkwload, asm: "VPROLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPROLQMasked512load", argLength: 3, reg: wkwload, asm: "VPROLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORD128load", argLength: 2, reg: w11load, asm: "VPRORD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORD256load", argLength: 2, reg: w11load, asm: "VPRORD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORD512load", argLength: 2, reg: w11load, asm: "VPRORD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORQ128load", argLength: 2, reg: w11load, asm: "VPRORQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORQ256load", argLength: 2, reg: w11load, asm: "VPRORQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORQ512load", argLength: 2, reg: w11load, asm: "VPRORQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORDMasked128load", argLength: 3, reg: wkwload, asm: "VPRORD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORDMasked256load", argLength: 3, reg: wkwload, asm: "VPRORD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORDMasked512load", argLength: 3, reg: wkwload, asm: "VPRORD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORQMasked128load", argLength: 3, reg: wkwload, asm: "VPRORQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORQMasked256load", argLength: 3, reg: wkwload, asm: "VPRORQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPRORQMasked512load", argLength: 3, reg: wkwload, asm: "VPRORQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDD128load", argLength: 3, reg: w21load, asm: "VPSHLDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDD256load", argLength: 3, reg: w21load, asm: "VPSHLDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDD512load", argLength: 3, reg: w21load, asm: "VPSHLDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQ128load", argLength: 3, reg: w21load, asm: "VPSHLDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQ256load", argLength: 3, reg: w21load, asm: "VPSHLDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQ512load", argLength: 3, reg: w21load, asm: "VPSHLDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHLDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHLDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHLDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHLDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDD128load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDD256load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDD512load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQ128load", argLength: 3, reg: w21load, asm: "VPSHRDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQ256load", argLength: 3, reg: w21load, asm: "VPSHRDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQ512load", argLength: 3, reg: w21load, asm: "VPSHRDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHRDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHRDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHRDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSHRDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLD128constload", argLength: 2, reg: v11load, asm: "VPSLLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLD256constload", argLength: 2, reg: v11load, asm: "VPSLLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLD512constload", argLength: 2, reg: w11load, asm: "VPSLLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQ128constload", argLength: 2, reg: v11load, asm: "VPSLLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQ256constload", argLength: 2, reg: v11load, asm: "VPSLLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQ512constload", argLength: 2, reg: w11load, asm: "VPSLLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLDMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLDMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLDMasked512constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSLLQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLD128constload", argLength: 2, reg: v11load, asm: "VPSRLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLD256constload", argLength: 2, reg: v11load, asm: "VPSRLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLD512constload", argLength: 2, reg: w11load, asm: "VPSRLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQ128constload", argLength: 2, reg: v11load, asm: "VPSRLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQ256constload", argLength: 2, reg: v11load, asm: "VPSRLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQ512constload", argLength: 2, reg: w11load, asm: "VPSRLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAD128constload", argLength: 2, reg: v11load, asm: "VPSRAD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAD256constload", argLength: 2, reg: v11load, asm: "VPSRAD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAD512constload", argLength: 2, reg: w11load, asm: "VPSRAD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQ128constload", argLength: 2, reg: w11load, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQ256constload", argLength: 2, reg: w11load, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQ512constload", argLength: 2, reg: w11load, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLDMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLDMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLDMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRLQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRADMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRADMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRADMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
{name: "VPSRAQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
} }
} }

File diff suppressed because it is too large Load diff

View file

@ -1027,6 +1027,30 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64VPINSRD128(v) return rewriteValueAMD64_OpAMD64VPINSRD128(v)
case OpAMD64VPINSRQ128: case OpAMD64VPINSRQ128:
return rewriteValueAMD64_OpAMD64VPINSRQ128(v) return rewriteValueAMD64_OpAMD64VPINSRQ128(v)
case OpAMD64VPLZCNTD128:
return rewriteValueAMD64_OpAMD64VPLZCNTD128(v)
case OpAMD64VPLZCNTD256:
return rewriteValueAMD64_OpAMD64VPLZCNTD256(v)
case OpAMD64VPLZCNTD512:
return rewriteValueAMD64_OpAMD64VPLZCNTD512(v)
case OpAMD64VPLZCNTDMasked128:
return rewriteValueAMD64_OpAMD64VPLZCNTDMasked128(v)
case OpAMD64VPLZCNTDMasked256:
return rewriteValueAMD64_OpAMD64VPLZCNTDMasked256(v)
case OpAMD64VPLZCNTDMasked512:
return rewriteValueAMD64_OpAMD64VPLZCNTDMasked512(v)
case OpAMD64VPLZCNTQ128:
return rewriteValueAMD64_OpAMD64VPLZCNTQ128(v)
case OpAMD64VPLZCNTQ256:
return rewriteValueAMD64_OpAMD64VPLZCNTQ256(v)
case OpAMD64VPLZCNTQ512:
return rewriteValueAMD64_OpAMD64VPLZCNTQ512(v)
case OpAMD64VPLZCNTQMasked128:
return rewriteValueAMD64_OpAMD64VPLZCNTQMasked128(v)
case OpAMD64VPLZCNTQMasked256:
return rewriteValueAMD64_OpAMD64VPLZCNTQMasked256(v)
case OpAMD64VPLZCNTQMasked512:
return rewriteValueAMD64_OpAMD64VPLZCNTQMasked512(v)
case OpAMD64VPMAXSD128: case OpAMD64VPMAXSD128:
return rewriteValueAMD64_OpAMD64VPMAXSD128(v) return rewriteValueAMD64_OpAMD64VPMAXSD128(v)
case OpAMD64VPMAXSD256: case OpAMD64VPMAXSD256:
@ -37718,6 +37742,318 @@ func rewriteValueAMD64_OpAMD64VPINSRQ128(v *Value) bool {
} }
return false return false
} }
func rewriteValueAMD64_OpAMD64VPLZCNTD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTD128load {sym} [off] ptr mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload128 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTD128load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTD256load {sym} [off] ptr mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload256 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTD256load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPLZCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTD512load {sym} [off] ptr mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload512 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTD512load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTDMasked128(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPLZCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTDMasked128load {sym} [off] ptr mask mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload128 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
mask := v_1
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTDMasked128load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTDMasked256(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPLZCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTDMasked256load {sym} [off] ptr mask mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload256 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
mask := v_1
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTDMasked256load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTDMasked512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPLZCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTDMasked512load {sym} [off] ptr mask mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload512 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
mask := v_1
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTDMasked512load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTQ128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPLZCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTQ128load {sym} [off] ptr mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload128 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTQ128load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTQ256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPLZCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTQ256load {sym} [off] ptr mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload256 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTQ256load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPLZCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTQ512load {sym} [off] ptr mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload512 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTQ512load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTQMasked128(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTQMasked128load {sym} [off] ptr mask mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload128 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
mask := v_1
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTQMasked128load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTQMasked256(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTQMasked256load {sym} [off] ptr mask mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload256 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
mask := v_1
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTQMasked256load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPLZCNTQMasked512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPLZCNTQMasked512load {sym} [off] ptr mask mem)
for {
l := v_0
if l.Op != OpAMD64VMOVDQUload512 {
break
}
off := auxIntToInt32(l.AuxInt)
sym := auxToSym(l.Aux)
mem := l.Args[1]
ptr := l.Args[0]
mask := v_1
if !(canMergeLoad(v, l) && clobber(l)) {
break
}
v.reset(OpAMD64VPLZCNTQMasked512load)
v.AuxInt = int32ToAuxInt(off)
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPMAXSD128(v *Value) bool { func rewriteValueAMD64_OpAMD64VPMAXSD128(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]

View file

@ -16,7 +16,7 @@ const simdMachineOpsTmpl = `
package main package main
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload regInfo) []opData { wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData {
return []opData{ return []opData{
{{- range .OpsData }} {{- range .OpsData }}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
@ -24,8 +24,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{{- range .OpsDataImm }} {{- range .OpsDataImm }}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}}, {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
{{- end }} {{- end }}
{{- range .OpsDataload}} {{- range .OpsDataLoad}}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}}, {name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
{{- end}}
{{- range .OpsDataImmLoad}}
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymValAndOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
{{- end}} {{- end}}
} }
} }
@ -50,17 +53,19 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
type machineOpsData struct { type machineOpsData struct {
OpsData []opData OpsData []opData
OpsDataImm []opData OpsDataImm []opData
OpsDataload []opData OpsDataLoad []opData
OpsDataImmLoad []opData
} }
regInfoSet := map[string]bool{ regInfoSet := map[string]bool{
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true, "v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true, "w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true,
"wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true, "wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true,
"w3kwload": true} "w3kwload": true, "w2kkload": true}
opsData := make([]opData, 0) opsData := make([]opData, 0)
opsDataImm := make([]opData, 0) opsDataImm := make([]opData, 0)
opsDataload := make([]opData, 0) opsDataLoad := make([]opData, 0)
opsDataImmLoad := make([]opData, 0)
// Determine the "best" version of an instruction to use // Determine the "best" version of an instruction to use
best := make(map[string]Operation) best := make(map[string]Operation)
@ -141,12 +146,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
if shapeOut == OneVregOutAtIn { if shapeOut == OneVregOutAtIn {
resultInArg0 = true resultInArg0 = true
} }
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { var memOpData *opData
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
// TODO: right now we put the uint8 immediates in [Aux] field, but for load this field needs to be occupied by SymOff.
// we should handle uint8 aux in [AuxInt]. Before that we will skip memory ops with imm.
} else {
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" { if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
// Right now we only have vbcst case // Right now we only have vbcst case
// Make a full vec memory variant. // Make a full vec memory variant.
@ -160,9 +160,19 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
log.Printf("Seen error: %e", err) log.Printf("Seen error: %e", err)
} }
} else { } else {
opsDataload = append(opsDataload, opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0}) memOpData = &opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0}
} }
} }
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
if memOpData != nil {
opsDataImmLoad = append(opsDataImmLoad, *memOpData)
}
} else {
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
if memOpData != nil {
opsDataLoad = append(opsDataLoad, *memOpData)
}
} }
} }
if len(regInfoErrs) != 0 { if len(regInfoErrs) != 0 {
@ -177,10 +187,13 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
sort.Slice(opsDataImm, func(i, j int) bool { sort.Slice(opsDataImm, func(i, j int) bool {
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
}) })
sort.Slice(opsDataload, func(i, j int) bool { sort.Slice(opsDataLoad, func(i, j int) bool {
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0 return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
}) })
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataload}) sort.Slice(opsDataImmLoad, func(i, j int) bool {
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
})
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataLoad, opsDataImmLoad})
if err != nil { if err != nil {
panic(fmt.Errorf("failed to execute template: %w", err)) panic(fmt.Errorf("failed to execute template: %w", err))
} }

View file

@ -88,6 +88,12 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
"v2kvload", "v2kvload",
"v2kload", "v2kload",
"v11load", "v11load",
"v11loadImm8",
"vkvloadImm8",
"v21loadImm8",
"v2kloadImm8",
"v2kkloadImm8",
"v2kvloadImm8",
} }
regInfoSet := map[string][]string{} regInfoSet := map[string][]string{}
for _, key := range regInfoKeys { for _, key := range regInfoKeys {
@ -108,11 +114,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
regShape += "ResultInArg0" regShape += "ResultInArg0"
} }
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn { if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
if mem == NoMem || mem == InvalidMem {
regShape += "Imm8" regShape += "Imm8"
} else {
return fmt.Errorf("simdgen cannot handle mem op with imm8 as of now")
}
} }
regShape, err = rewriteVecAsScalarRegInfo(op, regShape) regShape, err = rewriteVecAsScalarRegInfo(op, regShape)
if err != nil { if err != nil {