mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile, simd/_gen/simdgen: add const load mops
This CL adds the load + const imm8 variants ofr many instructions. Change-Id: I46116906077e33eabccc111be6d16019002f3474 Reviewed-on: https://go-review.googlesource.com/c/go/+/703395 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
1e5631d4e0
commit
3ec0b25ab7
9 changed files with 3639 additions and 33 deletions
|
|
@ -1365,6 +1365,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked128load,
|
ssa.OpAMD64VCVTPS2UDQMasked128load,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked256load,
|
ssa.OpAMD64VCVTPS2UDQMasked256load,
|
||||||
ssa.OpAMD64VCVTPS2UDQMasked512load,
|
ssa.OpAMD64VCVTPS2UDQMasked512load,
|
||||||
|
ssa.OpAMD64VPLZCNTDMasked128load,
|
||||||
|
ssa.OpAMD64VPLZCNTDMasked256load,
|
||||||
|
ssa.OpAMD64VPLZCNTDMasked512load,
|
||||||
|
ssa.OpAMD64VPLZCNTQMasked128load,
|
||||||
|
ssa.OpAMD64VPLZCNTQMasked256load,
|
||||||
|
ssa.OpAMD64VPLZCNTQMasked512load,
|
||||||
ssa.OpAMD64VPOPCNTDMasked128load,
|
ssa.OpAMD64VPOPCNTDMasked128load,
|
||||||
ssa.OpAMD64VPOPCNTDMasked256load,
|
ssa.OpAMD64VPOPCNTDMasked256load,
|
||||||
ssa.OpAMD64VPOPCNTDMasked512load,
|
ssa.OpAMD64VPOPCNTDMasked512load,
|
||||||
|
|
@ -1839,6 +1845,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VCVTPS2UDQ128load,
|
ssa.OpAMD64VCVTPS2UDQ128load,
|
||||||
ssa.OpAMD64VCVTPS2UDQ256load,
|
ssa.OpAMD64VCVTPS2UDQ256load,
|
||||||
ssa.OpAMD64VCVTPS2UDQ512load,
|
ssa.OpAMD64VCVTPS2UDQ512load,
|
||||||
|
ssa.OpAMD64VPLZCNTD128load,
|
||||||
|
ssa.OpAMD64VPLZCNTD256load,
|
||||||
|
ssa.OpAMD64VPLZCNTD512load,
|
||||||
|
ssa.OpAMD64VPLZCNTQ128load,
|
||||||
|
ssa.OpAMD64VPLZCNTQ256load,
|
||||||
|
ssa.OpAMD64VPLZCNTQ512load,
|
||||||
ssa.OpAMD64VPOPCNTD128load,
|
ssa.OpAMD64VPOPCNTD128load,
|
||||||
ssa.OpAMD64VPOPCNTD256load,
|
ssa.OpAMD64VPOPCNTD256load,
|
||||||
ssa.OpAMD64VPOPCNTD512load,
|
ssa.OpAMD64VPOPCNTD512load,
|
||||||
|
|
@ -1861,6 +1873,172 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
|
||||||
ssa.OpAMD64VSQRTPD512load:
|
ssa.OpAMD64VSQRTPD512load:
|
||||||
p = simdV11load(s, v)
|
p = simdV11load(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VRNDSCALEPS128load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPS256load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPS512load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPD128load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPD256load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPD512load,
|
||||||
|
ssa.OpAMD64VREDUCEPS128load,
|
||||||
|
ssa.OpAMD64VREDUCEPS256load,
|
||||||
|
ssa.OpAMD64VREDUCEPS512load,
|
||||||
|
ssa.OpAMD64VREDUCEPD128load,
|
||||||
|
ssa.OpAMD64VREDUCEPD256load,
|
||||||
|
ssa.OpAMD64VREDUCEPD512load,
|
||||||
|
ssa.OpAMD64VPSHUFD128load,
|
||||||
|
ssa.OpAMD64VPSHUFD256load,
|
||||||
|
ssa.OpAMD64VPSHUFD512load,
|
||||||
|
ssa.OpAMD64VPROLD128load,
|
||||||
|
ssa.OpAMD64VPROLD256load,
|
||||||
|
ssa.OpAMD64VPROLD512load,
|
||||||
|
ssa.OpAMD64VPROLQ128load,
|
||||||
|
ssa.OpAMD64VPROLQ256load,
|
||||||
|
ssa.OpAMD64VPROLQ512load,
|
||||||
|
ssa.OpAMD64VPRORD128load,
|
||||||
|
ssa.OpAMD64VPRORD256load,
|
||||||
|
ssa.OpAMD64VPRORD512load,
|
||||||
|
ssa.OpAMD64VPRORQ128load,
|
||||||
|
ssa.OpAMD64VPRORQ256load,
|
||||||
|
ssa.OpAMD64VPRORQ512load,
|
||||||
|
ssa.OpAMD64VPSLLD128constload,
|
||||||
|
ssa.OpAMD64VPSLLD256constload,
|
||||||
|
ssa.OpAMD64VPSLLD512constload,
|
||||||
|
ssa.OpAMD64VPSLLQ128constload,
|
||||||
|
ssa.OpAMD64VPSLLQ256constload,
|
||||||
|
ssa.OpAMD64VPSLLQ512constload,
|
||||||
|
ssa.OpAMD64VPSRLD128constload,
|
||||||
|
ssa.OpAMD64VPSRLD256constload,
|
||||||
|
ssa.OpAMD64VPSRLD512constload,
|
||||||
|
ssa.OpAMD64VPSRLQ128constload,
|
||||||
|
ssa.OpAMD64VPSRLQ256constload,
|
||||||
|
ssa.OpAMD64VPSRLQ512constload,
|
||||||
|
ssa.OpAMD64VPSRAD128constload,
|
||||||
|
ssa.OpAMD64VPSRAD256constload,
|
||||||
|
ssa.OpAMD64VPSRAD512constload,
|
||||||
|
ssa.OpAMD64VPSRAQ128constload,
|
||||||
|
ssa.OpAMD64VPSRAQ256constload,
|
||||||
|
ssa.OpAMD64VPSRAQ512constload:
|
||||||
|
p = simdV11loadImm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VRNDSCALEPSMasked128load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPSMasked256load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPSMasked512load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPDMasked128load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPDMasked256load,
|
||||||
|
ssa.OpAMD64VRNDSCALEPDMasked512load,
|
||||||
|
ssa.OpAMD64VREDUCEPSMasked128load,
|
||||||
|
ssa.OpAMD64VREDUCEPSMasked256load,
|
||||||
|
ssa.OpAMD64VREDUCEPSMasked512load,
|
||||||
|
ssa.OpAMD64VREDUCEPDMasked128load,
|
||||||
|
ssa.OpAMD64VREDUCEPDMasked256load,
|
||||||
|
ssa.OpAMD64VREDUCEPDMasked512load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked256load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked512load,
|
||||||
|
ssa.OpAMD64VPSHUFDMasked128load,
|
||||||
|
ssa.OpAMD64VPROLDMasked128load,
|
||||||
|
ssa.OpAMD64VPROLDMasked256load,
|
||||||
|
ssa.OpAMD64VPROLDMasked512load,
|
||||||
|
ssa.OpAMD64VPROLQMasked128load,
|
||||||
|
ssa.OpAMD64VPROLQMasked256load,
|
||||||
|
ssa.OpAMD64VPROLQMasked512load,
|
||||||
|
ssa.OpAMD64VPRORDMasked128load,
|
||||||
|
ssa.OpAMD64VPRORDMasked256load,
|
||||||
|
ssa.OpAMD64VPRORDMasked512load,
|
||||||
|
ssa.OpAMD64VPRORQMasked128load,
|
||||||
|
ssa.OpAMD64VPRORQMasked256load,
|
||||||
|
ssa.OpAMD64VPRORQMasked512load,
|
||||||
|
ssa.OpAMD64VPSLLDMasked128constload,
|
||||||
|
ssa.OpAMD64VPSLLDMasked256constload,
|
||||||
|
ssa.OpAMD64VPSLLDMasked512constload,
|
||||||
|
ssa.OpAMD64VPSLLQMasked128constload,
|
||||||
|
ssa.OpAMD64VPSLLQMasked256constload,
|
||||||
|
ssa.OpAMD64VPSLLQMasked512constload,
|
||||||
|
ssa.OpAMD64VPSRLDMasked128constload,
|
||||||
|
ssa.OpAMD64VPSRLDMasked256constload,
|
||||||
|
ssa.OpAMD64VPSRLDMasked512constload,
|
||||||
|
ssa.OpAMD64VPSRLQMasked128constload,
|
||||||
|
ssa.OpAMD64VPSRLQMasked256constload,
|
||||||
|
ssa.OpAMD64VPSRLQMasked512constload,
|
||||||
|
ssa.OpAMD64VPSRADMasked128constload,
|
||||||
|
ssa.OpAMD64VPSRADMasked256constload,
|
||||||
|
ssa.OpAMD64VPSRADMasked512constload,
|
||||||
|
ssa.OpAMD64VPSRAQMasked128constload,
|
||||||
|
ssa.OpAMD64VPSRAQMasked256constload,
|
||||||
|
ssa.OpAMD64VPSRAQMasked512constload:
|
||||||
|
p = simdVkvloadImm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VCMPPS128load,
|
||||||
|
ssa.OpAMD64VCMPPS256load,
|
||||||
|
ssa.OpAMD64VCMPPD128load,
|
||||||
|
ssa.OpAMD64VCMPPD256load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEQB128load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEQB256load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEQB512load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEINVQB128load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEINVQB256load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEINVQB512load,
|
||||||
|
ssa.OpAMD64VPSHLDD128load,
|
||||||
|
ssa.OpAMD64VPSHLDD256load,
|
||||||
|
ssa.OpAMD64VPSHLDD512load,
|
||||||
|
ssa.OpAMD64VPSHLDQ128load,
|
||||||
|
ssa.OpAMD64VPSHLDQ256load,
|
||||||
|
ssa.OpAMD64VPSHLDQ512load,
|
||||||
|
ssa.OpAMD64VPSHRDD128load,
|
||||||
|
ssa.OpAMD64VPSHRDD256load,
|
||||||
|
ssa.OpAMD64VPSHRDD512load,
|
||||||
|
ssa.OpAMD64VPSHRDQ128load,
|
||||||
|
ssa.OpAMD64VPSHRDQ256load,
|
||||||
|
ssa.OpAMD64VPSHRDQ512load:
|
||||||
|
p = simdV21loadImm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VCMPPS512load,
|
||||||
|
ssa.OpAMD64VCMPPD512load,
|
||||||
|
ssa.OpAMD64VPCMPUD512load,
|
||||||
|
ssa.OpAMD64VPCMPUQ512load,
|
||||||
|
ssa.OpAMD64VPCMPD512load,
|
||||||
|
ssa.OpAMD64VPCMPQ512load:
|
||||||
|
p = simdV2kloadImm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VCMPPSMasked128load,
|
||||||
|
ssa.OpAMD64VCMPPSMasked256load,
|
||||||
|
ssa.OpAMD64VCMPPSMasked512load,
|
||||||
|
ssa.OpAMD64VCMPPDMasked128load,
|
||||||
|
ssa.OpAMD64VCMPPDMasked256load,
|
||||||
|
ssa.OpAMD64VCMPPDMasked512load,
|
||||||
|
ssa.OpAMD64VPCMPDMasked128load,
|
||||||
|
ssa.OpAMD64VPCMPDMasked256load,
|
||||||
|
ssa.OpAMD64VPCMPDMasked512load,
|
||||||
|
ssa.OpAMD64VPCMPQMasked128load,
|
||||||
|
ssa.OpAMD64VPCMPQMasked256load,
|
||||||
|
ssa.OpAMD64VPCMPQMasked512load,
|
||||||
|
ssa.OpAMD64VPCMPUDMasked128load,
|
||||||
|
ssa.OpAMD64VPCMPUDMasked256load,
|
||||||
|
ssa.OpAMD64VPCMPUDMasked512load,
|
||||||
|
ssa.OpAMD64VPCMPUQMasked128load,
|
||||||
|
ssa.OpAMD64VPCMPUQMasked256load,
|
||||||
|
ssa.OpAMD64VPCMPUQMasked512load:
|
||||||
|
p = simdV2kkloadImm8(s, v)
|
||||||
|
|
||||||
|
case ssa.OpAMD64VGF2P8AFFINEINVQBMasked128load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEINVQBMasked256load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEINVQBMasked512load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEQBMasked128load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEQBMasked256load,
|
||||||
|
ssa.OpAMD64VGF2P8AFFINEQBMasked512load,
|
||||||
|
ssa.OpAMD64VPSHLDDMasked128load,
|
||||||
|
ssa.OpAMD64VPSHLDDMasked256load,
|
||||||
|
ssa.OpAMD64VPSHLDDMasked512load,
|
||||||
|
ssa.OpAMD64VPSHLDQMasked128load,
|
||||||
|
ssa.OpAMD64VPSHLDQMasked256load,
|
||||||
|
ssa.OpAMD64VPSHLDQMasked512load,
|
||||||
|
ssa.OpAMD64VPSHRDDMasked128load,
|
||||||
|
ssa.OpAMD64VPSHRDDMasked256load,
|
||||||
|
ssa.OpAMD64VPSHRDDMasked512load,
|
||||||
|
ssa.OpAMD64VPSHRDQMasked128load,
|
||||||
|
ssa.OpAMD64VPSHRDQMasked256load,
|
||||||
|
ssa.OpAMD64VPSHRDQMasked512load:
|
||||||
|
p = simdV2kvloadImm8(s, v)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Unknown reg shape
|
// Unknown reg shape
|
||||||
return false
|
return false
|
||||||
|
|
|
||||||
|
|
@ -2211,6 +2211,97 @@ func simdV11load(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Example instruction: VPSHUFD $7, (BX), X11
|
||||||
|
func simdV11loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = simdReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example instruction: VPRORD $81, -15(R14), K7, Y1
|
||||||
|
func simdVkvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
p.AddRestSourceReg(maskReg(v.Args[1]))
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = simdReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example instruction: VPSHLDD $82, 7(SI), Y21, Y3
|
||||||
|
func simdV21loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[0]))
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = simdReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example instruction: VCMPPS $81, -7(DI), Y16, K3
|
||||||
|
func simdV2kloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[0]))
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = maskReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example instruction: VCMPPS $81, -7(DI), Y16, K1, K3
|
||||||
|
func simdV2kkloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[0]))
|
||||||
|
p.AddRestSourceReg(maskReg(v.Args[2]))
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = maskReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example instruction: VGF2P8AFFINEINVQB $64, -17(BP), X31, K3, X26
|
||||||
|
func simdV2kvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
|
||||||
|
sc := v.AuxValAndOff()
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = sc.Val64()
|
||||||
|
m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
|
||||||
|
ssagen.AddAux2(&m, v, sc.Off64())
|
||||||
|
p.AddRestSource(m)
|
||||||
|
p.AddRestSourceReg(simdReg(v.Args[0]))
|
||||||
|
p.AddRestSourceReg(maskReg(v.Args[2]))
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = simdReg(v)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
var blockJump = [...]struct {
|
var blockJump = [...]struct {
|
||||||
asm, invasm obj.As
|
asm, invasm obj.As
|
||||||
}{
|
}{
|
||||||
|
|
|
||||||
|
|
@ -256,6 +256,7 @@ func init() {
|
||||||
w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly}
|
w2kwload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: wonly}
|
||||||
w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly}
|
w11load = regInfo{inputs: []regMask{gpspsb, 0}, outputs: wonly}
|
||||||
w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
w3kwload = regInfo{inputs: []regMask{w, wz, gpspsb, mask, 0}, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||||
|
w2kkload = regInfo{inputs: []regMask{wz, gpspsb, mask, 0}, outputs: maskonly}
|
||||||
|
|
||||||
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
||||||
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
|
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
|
||||||
|
|
@ -1459,7 +1460,7 @@ func init() {
|
||||||
genSIMDfile: "../../amd64/simdssa.go",
|
genSIMDfile: "../../amd64/simdssa.go",
|
||||||
ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv,
|
ops: append(AMD64ops, simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv,
|
||||||
w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, wkwload, v21load, v31load, v11load,
|
w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw, wkwload, v21load, v31load, v11load,
|
||||||
w21load, w31load, w2kload, w2kwload, w11load, w3kwload)...), // AMD64ops,
|
w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload)...), // AMD64ops,
|
||||||
blocks: AMD64blocks,
|
blocks: AMD64blocks,
|
||||||
regnames: regNamesAMD64,
|
regnames: regNamesAMD64,
|
||||||
ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11",
|
ParamIntRegNames: "AX BX CX DI SI R8 R9 R10 R11",
|
||||||
|
|
|
||||||
|
|
@ -1605,6 +1605,18 @@
|
||||||
(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem)
|
(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem)
|
||||||
(VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem)
|
(VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem)
|
||||||
(VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem)
|
(VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem)
|
||||||
|
(VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD128load {sym} [off] ptr mem)
|
||||||
|
(VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD256load {sym} [off] ptr mem)
|
||||||
|
(VPLZCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTD512load {sym} [off] ptr mem)
|
||||||
|
(VPLZCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQ128load {sym} [off] ptr mem)
|
||||||
|
(VPLZCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQ256load {sym} [off] ptr mem)
|
||||||
|
(VPLZCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQ512load {sym} [off] ptr mem)
|
||||||
|
(VPLZCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTDMasked128load {sym} [off] ptr mask mem)
|
||||||
|
(VPLZCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTDMasked256load {sym} [off] ptr mask mem)
|
||||||
|
(VPLZCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTDMasked512load {sym} [off] ptr mask mem)
|
||||||
|
(VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked128load {sym} [off] ptr mask mem)
|
||||||
|
(VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked256load {sym} [off] ptr mask mem)
|
||||||
|
(VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPLZCNTQMasked512load {sym} [off] ptr mask mem)
|
||||||
(VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem)
|
(VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem)
|
||||||
(VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem)
|
(VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem)
|
||||||
(VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem)
|
(VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
|
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
|
||||||
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload regInfo) []opData {
|
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData {
|
||||||
return []opData{
|
return []opData{
|
||||||
{name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false},
|
{name: "VADDPD128", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false},
|
||||||
{name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false},
|
{name: "VADDPD256", argLength: 2, reg: v21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false},
|
||||||
|
|
@ -1446,6 +1446,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPUNPCKLDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPUNPCKLDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPUNPCKLQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPUNPCKLQDQ256load", argLength: 3, reg: v21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPUNPCKLQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPUNPCKLQDQ512load", argLength: 3, reg: w21load, asm: "VPUNPCKLQDQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTD128load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTD256load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTD512load", argLength: 2, reg: w11load, asm: "VPLZCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTQ128load", argLength: 2, reg: w11load, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTQ256load", argLength: 2, reg: w11load, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTQ512load", argLength: 2, reg: w11load, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTDMasked128load", argLength: 3, reg: wkwload, asm: "VPLZCNTD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTDMasked256load", argLength: 3, reg: wkwload, asm: "VPLZCNTD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTDMasked512load", argLength: 3, reg: wkwload, asm: "VPLZCNTD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTQMasked128load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTQMasked256load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPLZCNTQMasked512load", argLength: 3, reg: wkwload, asm: "VPLZCNTQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VMAXPS128load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VMAXPS128load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VMAXPS256load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VMAXPS256load", argLength: 3, reg: v21load, asm: "VMAXPS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VMAXPS512load", argLength: 3, reg: w21load, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VMAXPS512load", argLength: 3, reg: w21load, asm: "VMAXPS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
|
@ -1804,5 +1816,159 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{name: "VPXORQMasked512load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPXORQMasked512load", argLength: 4, reg: w2kwload, asm: "VPXORQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPBLENDMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPBLENDMDMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
{name: "VPBLENDMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
{name: "VPBLENDMQMasked512load", argLength: 4, reg: w2kwload, asm: "VPBLENDMQ", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPS128load", argLength: 2, reg: w11load, asm: "VRNDSCALEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPS256load", argLength: 2, reg: w11load, asm: "VRNDSCALEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPS512load", argLength: 2, reg: w11load, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPD128load", argLength: 2, reg: w11load, asm: "VRNDSCALEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPD256load", argLength: 2, reg: w11load, asm: "VRNDSCALEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPD512load", argLength: 2, reg: w11load, asm: "VRNDSCALEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPSMasked128load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPSMasked256load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPSMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPDMasked128load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPDMasked256load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VRNDSCALEPDMasked512load", argLength: 3, reg: wkwload, asm: "VRNDSCALEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPS128load", argLength: 2, reg: w11load, asm: "VREDUCEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPS256load", argLength: 2, reg: w11load, asm: "VREDUCEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPS512load", argLength: 2, reg: w11load, asm: "VREDUCEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPD128load", argLength: 2, reg: w11load, asm: "VREDUCEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPD256load", argLength: 2, reg: w11load, asm: "VREDUCEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPD512load", argLength: 2, reg: w11load, asm: "VREDUCEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPSMasked128load", argLength: 3, reg: wkwload, asm: "VREDUCEPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPSMasked256load", argLength: 3, reg: wkwload, asm: "VREDUCEPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPSMasked512load", argLength: 3, reg: wkwload, asm: "VREDUCEPS", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPDMasked128load", argLength: 3, reg: wkwload, asm: "VREDUCEPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPDMasked256load", argLength: 3, reg: wkwload, asm: "VREDUCEPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VREDUCEPDMasked512load", argLength: 3, reg: wkwload, asm: "VREDUCEPD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPS128load", argLength: 3, reg: v21load, asm: "VCMPPS", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPS256load", argLength: 3, reg: v21load, asm: "VCMPPS", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPS512load", argLength: 3, reg: w2kload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPD128load", argLength: 3, reg: v21load, asm: "VCMPPD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPD256load", argLength: 3, reg: v21load, asm: "VCMPPD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPD512load", argLength: 3, reg: w2kload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPSMasked128load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPSMasked256load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPSMasked512load", argLength: 4, reg: w2kkload, asm: "VCMPPS", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPDMasked128load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPDMasked256load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VCMPPDMasked512load", argLength: 4, reg: w2kkload, asm: "VCMPPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPDMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPDMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPDMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPQMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPQMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPQMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUDMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUDMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUDMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUQMasked128load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUQMasked256load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUQMasked512load", argLength: 4, reg: w2kkload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEQB128load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEQB256load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEQB512load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEINVQB128load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEINVQB256load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEINVQB512load", argLength: 3, reg: w21load, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEINVQBMasked128load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEINVQBMasked256load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEINVQBMasked512load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEINVQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEQBMasked128load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEQBMasked256load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VGF2P8AFFINEQBMasked512load", argLength: 4, reg: w2kwload, asm: "VGF2P8AFFINEQB", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUD512load", argLength: 3, reg: w2kload, asm: "VPCMPUD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPUQ512load", argLength: 3, reg: w2kload, asm: "VPCMPUQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPD512load", argLength: 3, reg: w2kload, asm: "VPCMPD", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPCMPQ512load", argLength: 3, reg: w2kload, asm: "VPCMPQ", commutative: false, typ: "Mask", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHUFD128load", argLength: 2, reg: v11load, asm: "VPSHUFD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHUFD256load", argLength: 2, reg: v11load, asm: "VPSHUFD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHUFD512load", argLength: 2, reg: w11load, asm: "VPSHUFD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHUFDMasked256load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHUFDMasked512load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHUFDMasked128load", argLength: 3, reg: wkwload, asm: "VPSHUFD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLD128load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLD256load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLD512load", argLength: 2, reg: w11load, asm: "VPROLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLQ128load", argLength: 2, reg: w11load, asm: "VPROLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLQ256load", argLength: 2, reg: w11load, asm: "VPROLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLQ512load", argLength: 2, reg: w11load, asm: "VPROLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLDMasked128load", argLength: 3, reg: wkwload, asm: "VPROLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLDMasked256load", argLength: 3, reg: wkwload, asm: "VPROLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLDMasked512load", argLength: 3, reg: wkwload, asm: "VPROLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLQMasked128load", argLength: 3, reg: wkwload, asm: "VPROLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLQMasked256load", argLength: 3, reg: wkwload, asm: "VPROLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPROLQMasked512load", argLength: 3, reg: wkwload, asm: "VPROLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORD128load", argLength: 2, reg: w11load, asm: "VPRORD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORD256load", argLength: 2, reg: w11load, asm: "VPRORD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORD512load", argLength: 2, reg: w11load, asm: "VPRORD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORQ128load", argLength: 2, reg: w11load, asm: "VPRORQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORQ256load", argLength: 2, reg: w11load, asm: "VPRORQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORQ512load", argLength: 2, reg: w11load, asm: "VPRORQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORDMasked128load", argLength: 3, reg: wkwload, asm: "VPRORD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORDMasked256load", argLength: 3, reg: wkwload, asm: "VPRORD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORDMasked512load", argLength: 3, reg: wkwload, asm: "VPRORD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORQMasked128load", argLength: 3, reg: wkwload, asm: "VPRORQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORQMasked256load", argLength: 3, reg: wkwload, asm: "VPRORQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPRORQMasked512load", argLength: 3, reg: wkwload, asm: "VPRORQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDD128load", argLength: 3, reg: w21load, asm: "VPSHLDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDD256load", argLength: 3, reg: w21load, asm: "VPSHLDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDD512load", argLength: 3, reg: w21load, asm: "VPSHLDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDQ128load", argLength: 3, reg: w21load, asm: "VPSHLDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDQ256load", argLength: 3, reg: w21load, asm: "VPSHLDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDQ512load", argLength: 3, reg: w21load, asm: "VPSHLDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHLDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHLDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHLDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHLDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHLDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDD128load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDD256load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDD512load", argLength: 3, reg: w21load, asm: "VPSHRDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDQ128load", argLength: 3, reg: w21load, asm: "VPSHRDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDQ256load", argLength: 3, reg: w21load, asm: "VPSHRDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDQ512load", argLength: 3, reg: w21load, asm: "VPSHRDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDDMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHRDD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDDMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHRDD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDDMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHRDD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDQMasked128load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDQMasked256load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSHRDQMasked512load", argLength: 4, reg: w2kwload, asm: "VPSHRDQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLD128constload", argLength: 2, reg: v11load, asm: "VPSLLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLD256constload", argLength: 2, reg: v11load, asm: "VPSLLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLD512constload", argLength: 2, reg: w11load, asm: "VPSLLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLQ128constload", argLength: 2, reg: v11load, asm: "VPSLLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLQ256constload", argLength: 2, reg: v11load, asm: "VPSLLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLQ512constload", argLength: 2, reg: w11load, asm: "VPSLLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLDMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLDMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLDMasked512constload", argLength: 3, reg: wkwload, asm: "VPSLLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSLLQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSLLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLD128constload", argLength: 2, reg: v11load, asm: "VPSRLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLD256constload", argLength: 2, reg: v11load, asm: "VPSRLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLD512constload", argLength: 2, reg: w11load, asm: "VPSRLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLQ128constload", argLength: 2, reg: v11load, asm: "VPSRLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLQ256constload", argLength: 2, reg: v11load, asm: "VPSRLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLQ512constload", argLength: 2, reg: w11load, asm: "VPSRLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAD128constload", argLength: 2, reg: v11load, asm: "VPSRAD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAD256constload", argLength: 2, reg: v11load, asm: "VPSRAD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAD512constload", argLength: 2, reg: w11load, asm: "VPSRAD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAQ128constload", argLength: 2, reg: w11load, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAQ256constload", argLength: 2, reg: w11load, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAQ512constload", argLength: 2, reg: w11load, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLDMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLDMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLDMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRLD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRLQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRLQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRADMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRADMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRADMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
|
{name: "VPSRAQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1027,6 +1027,30 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpAMD64VPINSRD128(v)
|
return rewriteValueAMD64_OpAMD64VPINSRD128(v)
|
||||||
case OpAMD64VPINSRQ128:
|
case OpAMD64VPINSRQ128:
|
||||||
return rewriteValueAMD64_OpAMD64VPINSRQ128(v)
|
return rewriteValueAMD64_OpAMD64VPINSRQ128(v)
|
||||||
|
case OpAMD64VPLZCNTD128:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTD128(v)
|
||||||
|
case OpAMD64VPLZCNTD256:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTD256(v)
|
||||||
|
case OpAMD64VPLZCNTD512:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTD512(v)
|
||||||
|
case OpAMD64VPLZCNTDMasked128:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTDMasked128(v)
|
||||||
|
case OpAMD64VPLZCNTDMasked256:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTDMasked256(v)
|
||||||
|
case OpAMD64VPLZCNTDMasked512:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTDMasked512(v)
|
||||||
|
case OpAMD64VPLZCNTQ128:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTQ128(v)
|
||||||
|
case OpAMD64VPLZCNTQ256:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTQ256(v)
|
||||||
|
case OpAMD64VPLZCNTQ512:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTQ512(v)
|
||||||
|
case OpAMD64VPLZCNTQMasked128:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTQMasked128(v)
|
||||||
|
case OpAMD64VPLZCNTQMasked256:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTQMasked256(v)
|
||||||
|
case OpAMD64VPLZCNTQMasked512:
|
||||||
|
return rewriteValueAMD64_OpAMD64VPLZCNTQMasked512(v)
|
||||||
case OpAMD64VPMAXSD128:
|
case OpAMD64VPMAXSD128:
|
||||||
return rewriteValueAMD64_OpAMD64VPMAXSD128(v)
|
return rewriteValueAMD64_OpAMD64VPMAXSD128(v)
|
||||||
case OpAMD64VPMAXSD256:
|
case OpAMD64VPMAXSD256:
|
||||||
|
|
@ -37718,6 +37742,318 @@ func rewriteValueAMD64_OpAMD64VPINSRQ128(v *Value) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTD128(v *Value) bool {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTD128load {sym} [off] ptr mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload128 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTD128load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg2(ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTD256(v *Value) bool {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTD256load {sym} [off] ptr mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload256 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTD256load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg2(ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTD512(v *Value) bool {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTD512load {sym} [off] ptr mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTD512load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg2(ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTDMasked128(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTDMasked128load {sym} [off] ptr mask mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload128 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTDMasked128load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg3(ptr, mask, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTDMasked256(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTDMasked256load {sym} [off] ptr mask mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload256 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTDMasked256load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg3(ptr, mask, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTDMasked512(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTDMasked512load {sym} [off] ptr mask mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTDMasked512load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg3(ptr, mask, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTQ128(v *Value) bool {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTQ128load {sym} [off] ptr mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload128 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTQ128load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg2(ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTQ256(v *Value) bool {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTQ256load {sym} [off] ptr mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload256 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTQ256load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg2(ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTQ512(v *Value) bool {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem))
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTQ512load {sym} [off] ptr mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTQ512load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg2(ptr, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTQMasked128(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTQMasked128load {sym} [off] ptr mask mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload128 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTQMasked128load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg3(ptr, mask, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTQMasked256(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTQMasked256load {sym} [off] ptr mask mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload256 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTQMasked256load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg3(ptr, mask, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpAMD64VPLZCNTQMasked512(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
|
||||||
|
// cond: canMergeLoad(v, l) && clobber(l)
|
||||||
|
// result: (VPLZCNTQMasked512load {sym} [off] ptr mask mem)
|
||||||
|
for {
|
||||||
|
l := v_0
|
||||||
|
if l.Op != OpAMD64VMOVDQUload512 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
off := auxIntToInt32(l.AuxInt)
|
||||||
|
sym := auxToSym(l.Aux)
|
||||||
|
mem := l.Args[1]
|
||||||
|
ptr := l.Args[0]
|
||||||
|
mask := v_1
|
||||||
|
if !(canMergeLoad(v, l) && clobber(l)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPLZCNTQMasked512load)
|
||||||
|
v.AuxInt = int32ToAuxInt(off)
|
||||||
|
v.Aux = symToAux(sym)
|
||||||
|
v.AddArg3(ptr, mask, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpAMD64VPMAXSD128(v *Value) bool {
|
func rewriteValueAMD64_OpAMD64VPMAXSD128(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ const simdMachineOpsTmpl = `
|
||||||
package main
|
package main
|
||||||
|
|
||||||
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
|
func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vfpkv, w11, w21, w2k, wkw, w2kw, w2kk, w31, w3kw, wgpw, wgp, wfpw, wfpkw,
|
||||||
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload regInfo) []opData {
|
wkwload, v21load, v31load, v11load, w21load, w31load, w2kload, w2kwload, w11load, w3kwload, w2kkload regInfo) []opData {
|
||||||
return []opData{
|
return []opData{
|
||||||
{{- range .OpsData }}
|
{{- range .OpsData }}
|
||||||
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
|
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
|
||||||
|
|
@ -24,8 +24,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
|
||||||
{{- range .OpsDataImm }}
|
{{- range .OpsDataImm }}
|
||||||
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
|
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", aux: "UInt8", commutative: {{.Comm}}, typ: "{{.Type}}", resultInArg0: {{.ResultInArg0}}},
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- range .OpsDataload}}
|
{{- range .OpsDataLoad}}
|
||||||
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
|
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
|
||||||
|
{{- end}}
|
||||||
|
{{- range .OpsDataImmLoad}}
|
||||||
|
{name: "{{.OpName}}", argLength: {{.OpInLen}}, reg: {{.RegInfo}}, asm: "{{.Asm}}", commutative: {{.Comm}}, typ: "{{.Type}}", aux: "SymValAndOff", symEffect: "Read", resultInArg0: {{.ResultInArg0}}},
|
||||||
{{- end}}
|
{{- end}}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -48,19 +51,21 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
||||||
ResultInArg0 bool
|
ResultInArg0 bool
|
||||||
}
|
}
|
||||||
type machineOpsData struct {
|
type machineOpsData struct {
|
||||||
OpsData []opData
|
OpsData []opData
|
||||||
OpsDataImm []opData
|
OpsDataImm []opData
|
||||||
OpsDataload []opData
|
OpsDataLoad []opData
|
||||||
|
OpsDataImmLoad []opData
|
||||||
}
|
}
|
||||||
|
|
||||||
regInfoSet := map[string]bool{
|
regInfoSet := map[string]bool{
|
||||||
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
|
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
|
||||||
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true,
|
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true,
|
||||||
"wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true,
|
"wkwload": true, "v21load": true, "v31load": true, "v11load": true, "w21load": true, "w31load": true, "w2kload": true, "w2kwload": true, "w11load": true,
|
||||||
"w3kwload": true}
|
"w3kwload": true, "w2kkload": true}
|
||||||
opsData := make([]opData, 0)
|
opsData := make([]opData, 0)
|
||||||
opsDataImm := make([]opData, 0)
|
opsDataImm := make([]opData, 0)
|
||||||
opsDataload := make([]opData, 0)
|
opsDataLoad := make([]opData, 0)
|
||||||
|
opsDataImmLoad := make([]opData, 0)
|
||||||
|
|
||||||
// Determine the "best" version of an instruction to use
|
// Determine the "best" version of an instruction to use
|
||||||
best := make(map[string]Operation)
|
best := make(map[string]Operation)
|
||||||
|
|
@ -141,27 +146,32 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
||||||
if shapeOut == OneVregOutAtIn {
|
if shapeOut == OneVregOutAtIn {
|
||||||
resultInArg0 = true
|
resultInArg0 = true
|
||||||
}
|
}
|
||||||
|
var memOpData *opData
|
||||||
|
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
|
||||||
|
// Right now we only have vbcst case
|
||||||
|
// Make a full vec memory variant.
|
||||||
|
op = rewriteLastVregToMem(op)
|
||||||
|
regInfo, err := makeRegInfo(op, VregMemIn)
|
||||||
|
if err != nil {
|
||||||
|
// Just skip it if it's non nill.
|
||||||
|
// an error could be triggered by [checkVecAsScalar].
|
||||||
|
// TODO: make [checkVecAsScalar] aware of mem ops.
|
||||||
|
if *Verbose {
|
||||||
|
log.Printf("Seen error: %e", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
memOpData = &opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0}
|
||||||
|
}
|
||||||
|
}
|
||||||
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
|
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
|
||||||
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
|
opsDataImm = append(opsDataImm, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
|
||||||
// TODO: right now we put the uint8 immediates in [Aux] field, but for load this field needs to be occupied by SymOff.
|
if memOpData != nil {
|
||||||
// we should handle uint8 aux in [AuxInt]. Before that we will skip memory ops with imm.
|
opsDataImmLoad = append(opsDataImmLoad, *memOpData)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
|
opsData = append(opsData, opData{asm, gOp.Asm, len(gOp.In), regInfo, gOp.Commutative, outType, resultInArg0})
|
||||||
if op.MemFeatures != nil && *op.MemFeatures == "vbcst" {
|
if memOpData != nil {
|
||||||
// Right now we only have vbcst case
|
opsDataLoad = append(opsDataLoad, *memOpData)
|
||||||
// Make a full vec memory variant.
|
|
||||||
op = rewriteLastVregToMem(op)
|
|
||||||
regInfo, err := makeRegInfo(op, VregMemIn)
|
|
||||||
if err != nil {
|
|
||||||
// Just skip it if it's non nill.
|
|
||||||
// an error could be triggered by [checkVecAsScalar].
|
|
||||||
// TODO: make [checkVecAsScalar] aware of mem ops.
|
|
||||||
if *Verbose {
|
|
||||||
log.Printf("Seen error: %e", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
opsDataload = append(opsDataload, opData{asm + "load", gOp.Asm, len(gOp.In) + 1, regInfo, false, outType, resultInArg0})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -177,10 +187,13 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
|
||||||
sort.Slice(opsDataImm, func(i, j int) bool {
|
sort.Slice(opsDataImm, func(i, j int) bool {
|
||||||
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
|
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
|
||||||
})
|
})
|
||||||
sort.Slice(opsDataload, func(i, j int) bool {
|
sort.Slice(opsDataLoad, func(i, j int) bool {
|
||||||
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
|
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
|
||||||
})
|
})
|
||||||
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataload})
|
sort.Slice(opsDataImmLoad, func(i, j int) bool {
|
||||||
|
return compareNatural(opsData[i].OpName, opsData[j].OpName) < 0
|
||||||
|
})
|
||||||
|
err := t.Execute(buffer, machineOpsData{opsData, opsDataImm, opsDataLoad, opsDataImmLoad})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("failed to execute template: %w", err))
|
panic(fmt.Errorf("failed to execute template: %w", err))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,12 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
||||||
"v2kvload",
|
"v2kvload",
|
||||||
"v2kload",
|
"v2kload",
|
||||||
"v11load",
|
"v11load",
|
||||||
|
"v11loadImm8",
|
||||||
|
"vkvloadImm8",
|
||||||
|
"v21loadImm8",
|
||||||
|
"v2kloadImm8",
|
||||||
|
"v2kkloadImm8",
|
||||||
|
"v2kvloadImm8",
|
||||||
}
|
}
|
||||||
regInfoSet := map[string][]string{}
|
regInfoSet := map[string][]string{}
|
||||||
for _, key := range regInfoKeys {
|
for _, key := range regInfoKeys {
|
||||||
|
|
@ -108,11 +114,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
|
||||||
regShape += "ResultInArg0"
|
regShape += "ResultInArg0"
|
||||||
}
|
}
|
||||||
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
|
if shapeIn == OneImmIn || shapeIn == OneKmaskImmIn {
|
||||||
if mem == NoMem || mem == InvalidMem {
|
regShape += "Imm8"
|
||||||
regShape += "Imm8"
|
|
||||||
} else {
|
|
||||||
return fmt.Errorf("simdgen cannot handle mem op with imm8 as of now")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
regShape, err = rewriteVecAsScalarRegInfo(op, regShape)
|
regShape, err = rewriteVecAsScalarRegInfo(op, regShape)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue