diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 7338c16cdad..efa7895e97d 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -1476,6 +1476,24 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = v.Args[0].Reg() ssagen.AddAux(&p.To, v) + case ssa.OpAMD64VPMASK32load128, ssa.OpAMD64VPMASK64load128, ssa.OpAMD64VPMASK32load256, ssa.OpAMD64VPMASK64load256: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + ssagen.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = simdReg(v) + p.AddRestSourceReg(simdReg(v.Args[1])) // masking simd reg + + case ssa.OpAMD64VPMASK32store128, ssa.OpAMD64VPMASK64store128, ssa.OpAMD64VPMASK32store256, ssa.OpAMD64VPMASK64store256: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = simdReg(v.Args[2]) + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + ssagen.AddAux(&p.To, v) + p.AddRestSourceReg(simdReg(v.Args[1])) // masking simd reg + case ssa.OpAMD64VPMOVMToVec8x16, ssa.OpAMD64VPMOVMToVec8x32, ssa.OpAMD64VPMOVMToVec8x64, diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 5a21c95df9e..0136e41af76 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -1715,17 +1715,24 @@ (StoreMask64x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x8ToM val) mem) (Load ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem) - (Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem) (Load ptr mem) && t.Size() == 32 => (VMOVDQUload256 ptr mem) - (Store {t} ptr val mem) && t.Size() == 32 => (VMOVDQUstore256 ptr val mem) (Load ptr mem) && t.Size() == 64 => (VMOVDQUload512 ptr mem) - (Store {t} ptr val mem) && t.Size() == 64 => (VMOVDQUstore512 ptr val mem) +(LoadMasked32 ptr mask mem) && t.Size() == 16 => (VPMASK32load128 ptr mask mem) +(LoadMasked32 ptr mask mem) && t.Size() == 32 => (VPMASK32load256 ptr mask mem) +(LoadMasked64 ptr mask mem) && t.Size() == 16 => (VPMASK64load128 ptr mask mem) +(LoadMasked64 ptr mask mem) && t.Size() == 32 => (VPMASK64load256 ptr mask mem) + +(StoreMasked32 {t} ptr mask val mem) && t.Size() == 16 => (VPMASK32store128 ptr mask val mem) +(StoreMasked32 {t} ptr mask val mem) && t.Size() == 32 => (VPMASK32store256 ptr mask val mem) +(StoreMasked64 {t} ptr mask val mem) && t.Size() == 16 => (VPMASK64store128 ptr mask val mem) +(StoreMasked64 {t} ptr mask val mem) && t.Size() == 32 => (VPMASK64store256 ptr mask val mem) + (ZeroSIMD ) && t.Size() == 16 => (Zero128 ) (ZeroSIMD ) && t.Size() == 32 => (Zero256 ) (ZeroSIMD ) && t.Size() == 64 => (Zero512 ) diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index cd4b5b2a06a..66c37a495fb 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -202,6 +202,12 @@ func init() { fpstore = regInfo{inputs: []regMask{gpspsb, fp, 0}} fpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, fp, 0}} + // masked loads/stores, vector register or mask register + vloadv = regInfo{inputs: []regMask{gpspsb, v, 0}, outputs: vonly} + vstorev = regInfo{inputs: []regMask{gpspsb, v, v, 0}} + // vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly} + // vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}} + v01 = regInfo{inputs: nil, outputs: vonly} v11 = regInfo{inputs: vonly, outputs: vonly} v21 = regInfo{inputs: []regMask{v, v}, outputs: vonly} @@ -1279,6 +1285,17 @@ func init() { {name: "VMOVDQUload512", argLength: 2, reg: fpload, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1 = mem {name: "VMOVDQUstore512", argLength: 3, reg: fpstore, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg1, arg2 = mem + // AVX2 32 and 64-bit element masked moves. + {name: "VPMASK32load128", argLength: 3, reg: vloadv, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem + {name: "VPMASK32store128", argLength: 4, reg: vstorev, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem + {name: "VPMASK64load128", argLength: 3, reg: vloadv, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem + {name: "VPMASK64store128", argLength: 4, reg: vstorev, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem + + {name: "VPMASK32load256", argLength: 3, reg: vloadv, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem + {name: "VPMASK32store256", argLength: 4, reg: vstorev, asm: "VPMASKMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem + {name: "VPMASK64load256", argLength: 3, reg: vloadv, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=integer mask, arg2 = mem + {name: "VPMASK64store256", argLength: 4, reg: vstorev, asm: "VPMASKMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=integer mask, arg3 = mem + {name: "VPMOVMToVec8x16", argLength: 1, reg: kv, asm: "VPMOVM2B"}, {name: "VPMOVMToVec8x32", argLength: 1, reg: kv, asm: "VPMOVM2B"}, {name: "VPMOVMToVec8x64", argLength: 1, reg: kw, asm: "VPMOVM2B"}, diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go index 716fe9b8818..c1383199c4c 100644 --- a/src/cmd/compile/internal/ssa/_gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go @@ -372,6 +372,14 @@ var genericOps = []opData{ {name: "Load", argLength: 2}, // Load from arg0. arg1=memory {name: "Dereference", argLength: 2}, // Load from arg0. arg1=memory. Helper op for arg/result passing, result is an otherwise not-SSA-able "value". {name: "Store", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory. + + // masked memory operations. + // TODO add 16 and 8 + {name: "LoadMasked32", argLength: 3}, // Load from arg0, arg1 = mask of 32-bits, arg2 = memory + {name: "LoadMasked64", argLength: 3}, // Load from arg0, arg1 = mask of 64-bits, arg2 = memory + {name: "StoreMasked32", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 32-bits, arg3 = memory + {name: "StoreMasked64", argLength: 4, typ: "Mem", aux: "Typ"}, // Store arg2 to arg0, arg1=mask of 64-bits, arg3 = memory + // Normally we require that the source and destination of Move do not overlap. // There is an exception when we know all the loads will happen before all // the stores. In that case, overlap is ok. See diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 5736f0b8126..213089a44b8 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -631,6 +631,19 @@ func (b *Block) NewValue4(pos src.XPos, op Op, t *types.Type, arg0, arg1, arg2, return v } +// NewValue4A returns a new value in the block with four arguments and zero aux values. +func (b *Block) NewValue4A(pos src.XPos, op Op, t *types.Type, aux Aux, arg0, arg1, arg2, arg3 *Value) *Value { + v := b.Func.newValue(op, t, b, pos) + v.AuxInt = 0 + v.Aux = aux + v.Args = []*Value{arg0, arg1, arg2, arg3} + arg0.Uses++ + arg1.Uses++ + arg2.Uses++ + arg3.Uses++ + return v +} + // NewValue4I returns a new value in the block with four arguments and auxint value. func (b *Block) NewValue4I(pos src.XPos, op Op, t *types.Type, auxint int64, arg0, arg1, arg2, arg3 *Value) *Value { v := b.Func.newValue(op, t, b, pos) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 9db3dbaf572..8cc3e45902b 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1169,6 +1169,14 @@ const ( OpAMD64VMOVDQUstore256 OpAMD64VMOVDQUload512 OpAMD64VMOVDQUstore512 + OpAMD64VPMASK32load128 + OpAMD64VPMASK32store128 + OpAMD64VPMASK64load128 + OpAMD64VPMASK64store128 + OpAMD64VPMASK32load256 + OpAMD64VPMASK32store256 + OpAMD64VPMASK64load256 + OpAMD64VPMASK64store256 OpAMD64VPMOVMToVec8x16 OpAMD64VPMOVMToVec8x32 OpAMD64VPMOVMToVec8x64 @@ -4246,6 +4254,10 @@ const ( OpLoad OpDereference OpStore + OpLoadMasked32 + OpLoadMasked64 + OpStoreMasked32 + OpStoreMasked64 OpMove OpZero OpStoreWB @@ -18481,6 +18493,134 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMASK32load128", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymRead, + asm: x86.AVPMASKMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMASK32store128", + auxType: auxSymOff, + argLen: 4, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: x86.AVPMASKMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + }, + }, + { + name: "VPMASK64load128", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymRead, + asm: x86.AVPMASKMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMASK64store128", + auxType: auxSymOff, + argLen: 4, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: x86.AVPMASKMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + }, + }, + { + name: "VPMASK32load256", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymRead, + asm: x86.AVPMASKMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMASK32store256", + auxType: auxSymOff, + argLen: 4, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: x86.AVPMASKMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + }, + }, + { + name: "VPMASK64load256", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymRead, + asm: x86.AVPMASKMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMASK64store256", + auxType: auxSymOff, + argLen: 4, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: x86.AVPMASKMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB + }, + }, + }, { name: "VPMOVMToVec8x16", argLen: 1, @@ -59969,6 +60109,28 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "LoadMasked32", + argLen: 3, + generic: true, + }, + { + name: "LoadMasked64", + argLen: 3, + generic: true, + }, + { + name: "StoreMasked32", + auxType: auxTyp, + argLen: 4, + generic: true, + }, + { + name: "StoreMasked64", + auxType: auxTyp, + argLen: 4, + generic: true, + }, { name: "Move", auxType: auxTypSize, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index ecd4a21f43d..d9560c55c22 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2462,6 +2462,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLoadMask8x32(v) case OpLoadMask8x64: return rewriteValueAMD64_OpLoadMask8x64(v) + case OpLoadMasked32: + return rewriteValueAMD64_OpLoadMasked32(v) + case OpLoadMasked64: + return rewriteValueAMD64_OpLoadMasked64(v) case OpLocalAddr: return rewriteValueAMD64_OpLocalAddr(v) case OpLsh16x16: @@ -5208,6 +5212,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpStoreMask8x32(v) case OpStoreMask8x64: return rewriteValueAMD64_OpStoreMask8x64(v) + case OpStoreMasked32: + return rewriteValueAMD64_OpStoreMasked32(v) + case OpStoreMasked64: + return rewriteValueAMD64_OpStoreMasked64(v) case OpSub16: v.Op = OpAMD64SUBL return true @@ -40555,6 +40563,78 @@ func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpLoadMasked32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LoadMasked32 ptr mask mem) + // cond: t.Size() == 16 + // result: (VPMASK32load128 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK32load128) + v.AddArg3(ptr, mask, mem) + return true + } + // match: (LoadMasked32 ptr mask mem) + // cond: t.Size() == 32 + // result: (VPMASK32load256 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK32load256) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLoadMasked64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LoadMasked64 ptr mask mem) + // cond: t.Size() == 16 + // result: (VPMASK64load128 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK64load128) + v.AddArg3(ptr, mask, mem) + return true + } + // match: (LoadMasked64 ptr mask mem) + // cond: t.Size() == 32 + // result: (VPMASK64load256 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK64load256) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} func rewriteValueAMD64_OpLocalAddr(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -53517,6 +53597,84 @@ func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpStoreMasked32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (StoreMasked32 {t} ptr mask val mem) + // cond: t.Size() == 16 + // result: (VPMASK32store128 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK32store128) + v.AddArg4(ptr, mask, val, mem) + return true + } + // match: (StoreMasked32 {t} ptr mask val mem) + // cond: t.Size() == 32 + // result: (VPMASK32store256 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK32store256) + v.AddArg4(ptr, mask, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpStoreMasked64(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (StoreMasked64 {t} ptr mask val mem) + // cond: t.Size() == 16 + // result: (VPMASK64store128 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK64store128) + v.AddArg4(ptr, mask, val, mem) + return true + } + // match: (StoreMasked64 {t} ptr mask val mem) + // cond: t.Size() == 32 + // result: (VPMASK64store256 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK64store256) + v.AddArg4(ptr, mask, val, mem) + return true + } + return false +} func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 0284729a525..7326ae24852 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1808,6 +1808,19 @@ func simdStoreMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*s } } +func simdMaskedLoad(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue3(op, n.Type(), args[0], args[1], s.mem()) + } +} + +func simdMaskedStore(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue4A(op, types.TypeMem, args[0].Type, args[1], args[2], args[0], s.mem()) + return nil + } +} + // findIntrinsic returns a function which builds the SSA equivalent of the // function identified by the symbol sym. If sym is not an intrinsic call, returns nil. func findIntrinsic(sym *types.Sym) intrinsicBuilder { diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index e9121c9ee23..3b406c0d6fc 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -1270,6 +1270,11 @@ func (s *state) newValue4(op ssa.Op, t *types.Type, arg0, arg1, arg2, arg3 *ssa. return s.curBlock.NewValue4(s.peekPos(), op, t, arg0, arg1, arg2, arg3) } +// newValue4A adds a new value with four arguments and an aux value to the current block. +func (s *state) newValue4A(op ssa.Op, t *types.Type, aux ssa.Aux, arg0, arg1, arg2, arg3 *ssa.Value) *ssa.Value { + return s.curBlock.NewValue4A(s.peekPos(), op, t, aux, arg0, arg1, arg2, arg3) +} + // newValue4I adds a new value with four arguments and an auxint value to the current block. func (s *state) newValue4I(op ssa.Op, t *types.Type, aux int64, arg0, arg1, arg2, arg3 *ssa.Value) *ssa.Value { return s.curBlock.NewValue4I(s.peekPos(), op, t, aux, arg0, arg1, arg2, arg3)