mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: opcodes and rules and code generation to enable AVX512 masked loads/stores
Change-Id: I9e05fc5031420f60a2e6bac7b9f86365f0f4c0f1 Reviewed-on: https://go-review.googlesource.com/c/go/+/690335 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
f39711a03d
commit
1ac5f3533f
6 changed files with 392 additions and 3 deletions
|
|
@ -1494,6 +1494,25 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
ssagen.AddAux(&p.To, v)
|
ssagen.AddAux(&p.To, v)
|
||||||
p.AddRestSourceReg(simdReg(v.Args[1])) // masking simd reg
|
p.AddRestSourceReg(simdReg(v.Args[1])) // masking simd reg
|
||||||
|
|
||||||
|
case ssa.OpAMD64VPMASK64load512, ssa.OpAMD64VPMASK32load512, ssa.OpAMD64VPMASK16load512, ssa.OpAMD64VPMASK8load512:
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_MEM
|
||||||
|
p.From.Reg = v.Args[0].Reg()
|
||||||
|
ssagen.AddAux(&p.From, v)
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = simdReg(v)
|
||||||
|
p.AddRestSourceReg(v.Args[1].Reg()) // simd mask reg
|
||||||
|
x86.ParseSuffix(p, "Z") // must be zero if not in mask
|
||||||
|
|
||||||
|
case ssa.OpAMD64VPMASK64store512, ssa.OpAMD64VPMASK32store512, ssa.OpAMD64VPMASK16store512, ssa.OpAMD64VPMASK8store512:
|
||||||
|
p := s.Prog(v.Op.Asm())
|
||||||
|
p.From.Type = obj.TYPE_REG
|
||||||
|
p.From.Reg = simdReg(v.Args[2])
|
||||||
|
p.To.Type = obj.TYPE_MEM
|
||||||
|
p.To.Reg = v.Args[0].Reg()
|
||||||
|
ssagen.AddAux(&p.To, v)
|
||||||
|
p.AddRestSourceReg(v.Args[1].Reg()) // simd mask reg
|
||||||
|
|
||||||
case ssa.OpAMD64VPMOVMToVec8x16,
|
case ssa.OpAMD64VPMOVMToVec8x16,
|
||||||
ssa.OpAMD64VPMOVMToVec8x32,
|
ssa.OpAMD64VPMOVMToVec8x32,
|
||||||
ssa.OpAMD64VPMOVMToVec8x64,
|
ssa.OpAMD64VPMOVMToVec8x64,
|
||||||
|
|
|
||||||
|
|
@ -1756,6 +1756,18 @@
|
||||||
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 16 => (VPMASK64store128 ptr mask val mem)
|
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 16 => (VPMASK64store128 ptr mask val mem)
|
||||||
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 32 => (VPMASK64store256 ptr mask val mem)
|
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 32 => (VPMASK64store256 ptr mask val mem)
|
||||||
|
|
||||||
|
// SIMD vector K-masked loads and stores
|
||||||
|
|
||||||
|
(LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
|
||||||
|
(LoadMasked32 <t> ptr mask mem) && t.Size() == 64 => (VPMASK32load512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) mem)
|
||||||
|
(LoadMasked16 <t> ptr mask mem) && t.Size() == 64 => (VPMASK16load512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) mem)
|
||||||
|
(LoadMasked8 <t> ptr mask mem) && t.Size() == 64 => (VPMASK8load512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) mem)
|
||||||
|
|
||||||
|
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 64 => (VPMASK64store512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) val mem)
|
||||||
|
(StoreMasked32 {t} ptr mask val mem) && t.Size() == 64 => (VPMASK32store512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) val mem)
|
||||||
|
(StoreMasked16 {t} ptr mask val mem) && t.Size() == 64 => (VPMASK16store512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) val mem)
|
||||||
|
(StoreMasked8 {t} ptr mask val mem) && t.Size() == 64 => (VPMASK8store512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) val mem)
|
||||||
|
|
||||||
(ZeroSIMD <t>) && t.Size() == 16 => (Zero128 <t>)
|
(ZeroSIMD <t>) && t.Size() == 16 => (Zero128 <t>)
|
||||||
(ZeroSIMD <t>) && t.Size() == 32 => (Zero256 <t>)
|
(ZeroSIMD <t>) && t.Size() == 32 => (Zero256 <t>)
|
||||||
(ZeroSIMD <t>) && t.Size() == 64 => (Zero512 <t>)
|
(ZeroSIMD <t>) && t.Size() == 64 => (Zero512 <t>)
|
||||||
|
|
|
||||||
|
|
@ -205,8 +205,8 @@ func init() {
|
||||||
// masked loads/stores, vector register or mask register
|
// masked loads/stores, vector register or mask register
|
||||||
vloadv = regInfo{inputs: []regMask{gpspsb, v, 0}, outputs: vonly}
|
vloadv = regInfo{inputs: []regMask{gpspsb, v, 0}, outputs: vonly}
|
||||||
vstorev = regInfo{inputs: []regMask{gpspsb, v, v, 0}}
|
vstorev = regInfo{inputs: []regMask{gpspsb, v, v, 0}}
|
||||||
// vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
|
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
|
||||||
// vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
||||||
|
|
||||||
v01 = regInfo{inputs: nil, outputs: vonly}
|
v01 = regInfo{inputs: nil, outputs: vonly}
|
||||||
v11 = regInfo{inputs: vonly, outputs: vonly}
|
v11 = regInfo{inputs: vonly, outputs: vonly}
|
||||||
|
|
@ -1286,7 +1286,7 @@ func init() {
|
||||||
{name: "VMOVDQUload512", argLength: 2, reg: fpload, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1 = mem
|
{name: "VMOVDQUload512", argLength: 2, reg: fpload, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1 = mem
|
||||||
{name: "VMOVDQUstore512", argLength: 3, reg: fpstore, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg1, arg2 = mem
|
{name: "VMOVDQUstore512", argLength: 3, reg: fpstore, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg1, arg2 = mem
|
||||||
|
|
||||||
// AVX2 32 and 64-bit element masked moves.
|
// AVX2 32 and 64-bit element int-vector masked moves.
|
||||||
{name: "VPMASK32load128", argLength: 3, reg: vloadv, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem
|
{name: "VPMASK32load128", argLength: 3, reg: vloadv, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem
|
||||||
{name: "VPMASK32store128", argLength: 4, reg: vstorev, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem
|
{name: "VPMASK32store128", argLength: 4, reg: vstorev, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem
|
||||||
{name: "VPMASK64load128", argLength: 3, reg: vloadv, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem
|
{name: "VPMASK64load128", argLength: 3, reg: vloadv, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem
|
||||||
|
|
@ -1297,6 +1297,16 @@ func init() {
|
||||||
{name: "VPMASK64load256", argLength: 3, reg: vloadv, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem
|
{name: "VPMASK64load256", argLength: 3, reg: vloadv, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem
|
||||||
{name: "VPMASK64store256", argLength: 4, reg: vstorev, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem
|
{name: "VPMASK64store256", argLength: 4, reg: vstorev, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem
|
||||||
|
|
||||||
|
// AVX512 8-64-bit element mask-register masked moves
|
||||||
|
{name: "VPMASK8load512", argLength: 3, reg: vloadk, asm: "VMOVDQU8", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=k mask, arg2 = mem
|
||||||
|
{name: "VPMASK8store512", argLength: 4, reg: vstorek, asm: "VMOVDQU8", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=k mask, arg3 = mem
|
||||||
|
{name: "VPMASK16load512", argLength: 3, reg: vloadk, asm: "VMOVDQU16", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=k mask, arg2 = mem
|
||||||
|
{name: "VPMASK16store512", argLength: 4, reg: vstorek, asm: "VMOVDQU16", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=k mask, arg3 = mem
|
||||||
|
{name: "VPMASK32load512", argLength: 3, reg: vloadk, asm: "VMOVDQU32", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=k mask, arg2 = mem
|
||||||
|
{name: "VPMASK32store512", argLength: 4, reg: vstorek, asm: "VMOVDQU32", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=k mask, arg3 = mem
|
||||||
|
{name: "VPMASK64load512", argLength: 3, reg: vloadk, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=k mask, arg2 = mem
|
||||||
|
{name: "VPMASK64store512", argLength: 4, reg: vstorek, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=k mask, arg3 = mem
|
||||||
|
|
||||||
{name: "VPMOVMToVec8x16", argLength: 1, reg: kv, asm: "VPMOVM2B"},
|
{name: "VPMOVMToVec8x16", argLength: 1, reg: kv, asm: "VPMOVM2B"},
|
||||||
{name: "VPMOVMToVec8x32", argLength: 1, reg: kv, asm: "VPMOVM2B"},
|
{name: "VPMOVMToVec8x32", argLength: 1, reg: kv, asm: "VPMOVM2B"},
|
||||||
{name: "VPMOVMToVec8x64", argLength: 1, reg: kw, asm: "VPMOVM2B"},
|
{name: "VPMOVMToVec8x64", argLength: 1, reg: kw, asm: "VPMOVM2B"},
|
||||||
|
|
|
||||||
|
|
@ -375,8 +375,12 @@ var genericOps = []opData{
|
||||||
|
|
||||||
// masked memory operations.
|
// masked memory operations.
|
||||||
// TODO add 16 and 8
|
// TODO add 16 and 8
|
||||||
|
{name: "LoadMasked8", argLength: 3}, // Load from arg0, arg1 = mask of 8-bits, arg2 = memory
|
||||||
|
{name: "LoadMasked16", argLength: 3}, // Load from arg0, arg1 = mask of 16-bits, arg2 = memory
|
||||||
{name: "LoadMasked32", argLength: 3}, // Load from arg0, arg1 = mask of 32-bits, arg2 = memory
|
{name: "LoadMasked32", argLength: 3}, // Load from arg0, arg1 = mask of 32-bits, arg2 = memory
|
||||||
{name: "LoadMasked64", argLength: 3}, // Load from arg0, arg1 = mask of 64-bits, arg2 = memory
|
{name: "LoadMasked64", argLength: 3}, // Load from arg0, arg1 = mask of 64-bits, arg2 = memory
|
||||||
|
{name: "StoreMasked8", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 8-bits, arg3 = memory
|
||||||
|
{name: "StoreMasked16", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 16-bits, arg3 = memory
|
||||||
{name: "StoreMasked32", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 32-bits, arg3 = memory
|
{name: "StoreMasked32", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 32-bits, arg3 = memory
|
||||||
{name: "StoreMasked64", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 64-bits, arg3 = memory
|
{name: "StoreMasked64", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 64-bits, arg3 = memory
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1177,6 +1177,14 @@ const (
|
||||||
OpAMD64VPMASK32store256
|
OpAMD64VPMASK32store256
|
||||||
OpAMD64VPMASK64load256
|
OpAMD64VPMASK64load256
|
||||||
OpAMD64VPMASK64store256
|
OpAMD64VPMASK64store256
|
||||||
|
OpAMD64VPMASK8load512
|
||||||
|
OpAMD64VPMASK8store512
|
||||||
|
OpAMD64VPMASK16load512
|
||||||
|
OpAMD64VPMASK16store512
|
||||||
|
OpAMD64VPMASK32load512
|
||||||
|
OpAMD64VPMASK32store512
|
||||||
|
OpAMD64VPMASK64load512
|
||||||
|
OpAMD64VPMASK64store512
|
||||||
OpAMD64VPMOVMToVec8x16
|
OpAMD64VPMOVMToVec8x16
|
||||||
OpAMD64VPMOVMToVec8x32
|
OpAMD64VPMOVMToVec8x32
|
||||||
OpAMD64VPMOVMToVec8x64
|
OpAMD64VPMOVMToVec8x64
|
||||||
|
|
@ -4270,8 +4278,12 @@ const (
|
||||||
OpLoad
|
OpLoad
|
||||||
OpDereference
|
OpDereference
|
||||||
OpStore
|
OpStore
|
||||||
|
OpLoadMasked8
|
||||||
|
OpLoadMasked16
|
||||||
OpLoadMasked32
|
OpLoadMasked32
|
||||||
OpLoadMasked64
|
OpLoadMasked64
|
||||||
|
OpStoreMasked8
|
||||||
|
OpStoreMasked16
|
||||||
OpStoreMasked32
|
OpStoreMasked32
|
||||||
OpStoreMasked64
|
OpStoreMasked64
|
||||||
OpMove
|
OpMove
|
||||||
|
|
@ -18661,6 +18673,134 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK8load512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 3,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVMOVDQU8,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK8store512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 4,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymWrite,
|
||||||
|
asm: x86.AVMOVDQU8,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK16load512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 3,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVMOVDQU16,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK16store512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 4,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymWrite,
|
||||||
|
asm: x86.AVMOVDQU16,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK32load512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 3,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVMOVDQU32,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK32store512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 4,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymWrite,
|
||||||
|
asm: x86.AVMOVDQU32,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK64load512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 3,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymRead,
|
||||||
|
asm: x86.AVMOVDQU64,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "VPMASK64store512",
|
||||||
|
auxType: auxSymOff,
|
||||||
|
argLen: 4,
|
||||||
|
faultOnNilArg0: true,
|
||||||
|
symEffect: SymWrite,
|
||||||
|
asm: x86.AVMOVDQU64,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
|
||||||
|
{2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||||
|
{0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "VPMOVMToVec8x16",
|
name: "VPMOVMToVec8x16",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
@ -60363,6 +60503,16 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMasked8",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoadMasked16",
|
||||||
|
argLen: 3,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "LoadMasked32",
|
name: "LoadMasked32",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
|
@ -60373,6 +60523,18 @@ var opcodeTable = [...]opInfo{
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
generic: true,
|
generic: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "StoreMasked8",
|
||||||
|
auxType: auxTyp,
|
||||||
|
argLen: 4,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "StoreMasked16",
|
||||||
|
auxType: auxTyp,
|
||||||
|
argLen: 4,
|
||||||
|
generic: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "StoreMasked32",
|
name: "StoreMasked32",
|
||||||
auxType: auxTyp,
|
auxType: auxTyp,
|
||||||
|
|
|
||||||
|
|
@ -2516,10 +2516,14 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpLoadMask8x32(v)
|
return rewriteValueAMD64_OpLoadMask8x32(v)
|
||||||
case OpLoadMask8x64:
|
case OpLoadMask8x64:
|
||||||
return rewriteValueAMD64_OpLoadMask8x64(v)
|
return rewriteValueAMD64_OpLoadMask8x64(v)
|
||||||
|
case OpLoadMasked16:
|
||||||
|
return rewriteValueAMD64_OpLoadMasked16(v)
|
||||||
case OpLoadMasked32:
|
case OpLoadMasked32:
|
||||||
return rewriteValueAMD64_OpLoadMasked32(v)
|
return rewriteValueAMD64_OpLoadMasked32(v)
|
||||||
case OpLoadMasked64:
|
case OpLoadMasked64:
|
||||||
return rewriteValueAMD64_OpLoadMasked64(v)
|
return rewriteValueAMD64_OpLoadMasked64(v)
|
||||||
|
case OpLoadMasked8:
|
||||||
|
return rewriteValueAMD64_OpLoadMasked8(v)
|
||||||
case OpLocalAddr:
|
case OpLocalAddr:
|
||||||
return rewriteValueAMD64_OpLocalAddr(v)
|
return rewriteValueAMD64_OpLocalAddr(v)
|
||||||
case OpLsh16x16:
|
case OpLsh16x16:
|
||||||
|
|
@ -5266,10 +5270,14 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
return rewriteValueAMD64_OpStoreMask8x32(v)
|
return rewriteValueAMD64_OpStoreMask8x32(v)
|
||||||
case OpStoreMask8x64:
|
case OpStoreMask8x64:
|
||||||
return rewriteValueAMD64_OpStoreMask8x64(v)
|
return rewriteValueAMD64_OpStoreMask8x64(v)
|
||||||
|
case OpStoreMasked16:
|
||||||
|
return rewriteValueAMD64_OpStoreMasked16(v)
|
||||||
case OpStoreMasked32:
|
case OpStoreMasked32:
|
||||||
return rewriteValueAMD64_OpStoreMasked32(v)
|
return rewriteValueAMD64_OpStoreMasked32(v)
|
||||||
case OpStoreMasked64:
|
case OpStoreMasked64:
|
||||||
return rewriteValueAMD64_OpStoreMasked64(v)
|
return rewriteValueAMD64_OpStoreMasked64(v)
|
||||||
|
case OpStoreMasked8:
|
||||||
|
return rewriteValueAMD64_OpStoreMasked8(v)
|
||||||
case OpSub16:
|
case OpSub16:
|
||||||
v.Op = OpAMD64SUBL
|
v.Op = OpAMD64SUBL
|
||||||
return true
|
return true
|
||||||
|
|
@ -40881,10 +40889,35 @@ func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMasked16 <t> ptr mask mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK16load512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) mem)
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
mem := v_2
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK16load512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
|
func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
// match: (LoadMasked32 <t> ptr mask mem)
|
// match: (LoadMasked32 <t> ptr mask mem)
|
||||||
// cond: t.Size() == 16
|
// cond: t.Size() == 16
|
||||||
// result: (VPMASK32load128 ptr mask mem)
|
// result: (VPMASK32load128 ptr mask mem)
|
||||||
|
|
@ -40915,12 +40948,30 @@ func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
|
||||||
v.AddArg3(ptr, mask, mem)
|
v.AddArg3(ptr, mask, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (LoadMasked32 <t> ptr mask mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK32load512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) mem)
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
mem := v_2
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK32load512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
|
func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
// match: (LoadMasked64 <t> ptr mask mem)
|
// match: (LoadMasked64 <t> ptr mask mem)
|
||||||
// cond: t.Size() == 16
|
// cond: t.Size() == 16
|
||||||
// result: (VPMASK64load128 ptr mask mem)
|
// result: (VPMASK64load128 ptr mask mem)
|
||||||
|
|
@ -40951,6 +41002,47 @@ func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
|
||||||
v.AddArg3(ptr, mask, mem)
|
v.AddArg3(ptr, mask, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (LoadMasked64 <t> ptr mask mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
mem := v_2
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK64load512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpLoadMasked8(v *Value) bool {
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (LoadMasked8 <t> ptr mask mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK8load512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) mem)
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
mem := v_2
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK8load512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg3(ptr, v0, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
|
func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
|
||||||
|
|
@ -53915,11 +54007,38 @@ func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
|
||||||
|
v_3 := v.Args[3]
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMasked16 {t} ptr mask val mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK16store512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) val mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
val := v_2
|
||||||
|
mem := v_3
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK16store512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg4(ptr, v0, val, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
|
func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
|
||||||
v_3 := v.Args[3]
|
v_3 := v.Args[3]
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
// match: (StoreMasked32 {t} ptr mask val mem)
|
// match: (StoreMasked32 {t} ptr mask val mem)
|
||||||
// cond: t.Size() == 16
|
// cond: t.Size() == 16
|
||||||
// result: (VPMASK32store128 ptr mask val mem)
|
// result: (VPMASK32store128 ptr mask val mem)
|
||||||
|
|
@ -53952,6 +54071,24 @@ func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
|
||||||
v.AddArg4(ptr, mask, val, mem)
|
v.AddArg4(ptr, mask, val, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (StoreMasked32 {t} ptr mask val mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK32store512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) val mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
val := v_2
|
||||||
|
mem := v_3
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK32store512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg4(ptr, v0, val, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
|
func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
|
||||||
|
|
@ -53959,6 +54096,7 @@ func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
|
||||||
v_2 := v.Args[2]
|
v_2 := v.Args[2]
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
// match: (StoreMasked64 {t} ptr mask val mem)
|
// match: (StoreMasked64 {t} ptr mask val mem)
|
||||||
// cond: t.Size() == 16
|
// cond: t.Size() == 16
|
||||||
// result: (VPMASK64store128 ptr mask val mem)
|
// result: (VPMASK64store128 ptr mask val mem)
|
||||||
|
|
@ -53991,6 +54129,50 @@ func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
|
||||||
v.AddArg4(ptr, mask, val, mem)
|
v.AddArg4(ptr, mask, val, mem)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (StoreMasked64 {t} ptr mask val mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK64store512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) val mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
val := v_2
|
||||||
|
mem := v_3
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK64store512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg4(ptr, v0, val, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
|
||||||
|
v_3 := v.Args[3]
|
||||||
|
v_2 := v.Args[2]
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
b := v.Block
|
||||||
|
// match: (StoreMasked8 {t} ptr mask val mem)
|
||||||
|
// cond: t.Size() == 64
|
||||||
|
// result: (VPMASK8store512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) val mem)
|
||||||
|
for {
|
||||||
|
t := auxToType(v.Aux)
|
||||||
|
ptr := v_0
|
||||||
|
mask := v_1
|
||||||
|
val := v_2
|
||||||
|
mem := v_3
|
||||||
|
if !(t.Size() == 64) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpAMD64VPMASK8store512)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
|
||||||
|
v0.AddArg(mask)
|
||||||
|
v.AddArg4(ptr, v0, val, mem)
|
||||||
|
return true
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
|
func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue