mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: use X15 for zero value in AVX context
With the previous CL, the X15 (aliasd with Y15, Z15) register holds the zero value for the whole register width. Use that in AVX context when a zero value is needed. Change-Id: If49b7059bce50c5e86f90bace0eaa830a91fa0fc Reviewed-on: https://go-review.googlesource.com/c/go/+/698238 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> TryBot-Bypass: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
4c311aa38f
commit
8d874834f1
4 changed files with 998 additions and 988 deletions
|
|
@ -1713,12 +1713,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
|
case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
|
||||||
s.Prog(v.Op.Asm())
|
s.Prog(v.Op.Asm())
|
||||||
case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
|
case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
|
||||||
p := s.Prog(v.Op.Asm())
|
// zero-width, no instruction generated
|
||||||
p.From.Type = obj.TYPE_REG
|
|
||||||
p.From.Reg = simdReg(v)
|
|
||||||
p.AddRestSourceReg(simdReg(v))
|
|
||||||
p.To.Type = obj.TYPE_REG
|
|
||||||
p.To.Reg = simdReg(v)
|
|
||||||
case ssa.OpAMD64VPADDD4:
|
case ssa.OpAMD64VPADDD4:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
|
|
|
||||||
|
|
@ -132,6 +132,9 @@ func init() {
|
||||||
gpspsb = gpsp | buildReg("SB")
|
gpspsb = gpsp | buildReg("SB")
|
||||||
gpspsbg = gpspsb | g
|
gpspsbg = gpspsb | g
|
||||||
callerSave = gp | fp | g // runtime.setg (and anything calling it) may clobber g
|
callerSave = gp | fp | g // runtime.setg (and anything calling it) may clobber g
|
||||||
|
|
||||||
|
vz = v | x15
|
||||||
|
wz = w | x15
|
||||||
)
|
)
|
||||||
// Common slices of register masks
|
// Common slices of register masks
|
||||||
var (
|
var (
|
||||||
|
|
@ -140,6 +143,8 @@ func init() {
|
||||||
vonly = []regMask{v}
|
vonly = []regMask{v}
|
||||||
wonly = []regMask{w}
|
wonly = []regMask{w}
|
||||||
maskonly = []regMask{mask}
|
maskonly = []regMask{mask}
|
||||||
|
vzonly = []regMask{vz}
|
||||||
|
wzonly = []regMask{wz}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Common regInfo
|
// Common regInfo
|
||||||
|
|
@ -207,26 +212,24 @@ func init() {
|
||||||
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
|
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
|
||||||
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
|
||||||
|
|
||||||
v01 = regInfo{inputs: nil, outputs: vonly}
|
v11 = regInfo{inputs: vzonly, outputs: vonly}
|
||||||
v11 = regInfo{inputs: vonly, outputs: vonly}
|
v21 = regInfo{inputs: []regMask{vz, vz}, outputs: vonly}
|
||||||
v21 = regInfo{inputs: []regMask{v, v}, outputs: vonly}
|
vk = regInfo{inputs: vzonly, outputs: maskonly}
|
||||||
vk = regInfo{inputs: vonly, outputs: maskonly}
|
|
||||||
kv = regInfo{inputs: maskonly, outputs: vonly}
|
kv = regInfo{inputs: maskonly, outputs: vonly}
|
||||||
v2k = regInfo{inputs: []regMask{v, v}, outputs: maskonly}
|
v2k = regInfo{inputs: []regMask{vz, vz}, outputs: maskonly}
|
||||||
vkv = regInfo{inputs: []regMask{v, mask}, outputs: vonly}
|
vkv = regInfo{inputs: []regMask{vz, mask}, outputs: vonly}
|
||||||
v2kv = regInfo{inputs: []regMask{v, v, mask}, outputs: vonly}
|
v2kv = regInfo{inputs: []regMask{vz, vz, mask}, outputs: vonly}
|
||||||
v2kk = regInfo{inputs: []regMask{v, v, mask}, outputs: maskonly}
|
v2kk = regInfo{inputs: []regMask{vz, vz, mask}, outputs: maskonly}
|
||||||
v31 = regInfo{inputs: []regMask{v, v, v}, outputs: vonly}
|
v31 = regInfo{inputs: []regMask{v, vz, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||||
v3kv = regInfo{inputs: []regMask{v, v, v, mask}, outputs: vonly}
|
v3kv = regInfo{inputs: []regMask{v, vz, vz, mask}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
|
||||||
vgpv = regInfo{inputs: []regMask{v, gp}, outputs: vonly}
|
vgpv = regInfo{inputs: []regMask{vz, gp}, outputs: vonly}
|
||||||
vgp = regInfo{inputs: vonly, outputs: gponly}
|
vgp = regInfo{inputs: vonly, outputs: gponly}
|
||||||
vfpv = regInfo{inputs: []regMask{v, fp}, outputs: vonly}
|
vfpv = regInfo{inputs: []regMask{vz, fp}, outputs: vonly}
|
||||||
vfpkv = regInfo{inputs: []regMask{v, fp, mask}, outputs: vonly}
|
vfpkv = regInfo{inputs: []regMask{vz, fp, mask}, outputs: vonly}
|
||||||
|
|
||||||
w01 = regInfo{inputs: nil, outputs: wonly}
|
w11 = regInfo{inputs: wzonly, outputs: wonly}
|
||||||
w11 = regInfo{inputs: wonly, outputs: wonly}
|
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
|
||||||
w21 = regInfo{inputs: []regMask{w, w}, outputs: wonly}
|
wk = regInfo{inputs: wzonly, outputs: maskonly}
|
||||||
wk = regInfo{inputs: wonly, outputs: maskonly}
|
|
||||||
kw = regInfo{inputs: maskonly, outputs: wonly}
|
kw = regInfo{inputs: maskonly, outputs: wonly}
|
||||||
w2k = regInfo{inputs: []regMask{fp, fp}, outputs: maskonly}
|
w2k = regInfo{inputs: []regMask{fp, fp}, outputs: maskonly}
|
||||||
wkw = regInfo{inputs: []regMask{fp, mask}, outputs: fponly}
|
wkw = regInfo{inputs: []regMask{fp, mask}, outputs: fponly}
|
||||||
|
|
@ -235,15 +238,17 @@ func init() {
|
||||||
w31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
|
w31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
|
||||||
w3kw = regInfo{inputs: []regMask{fp, fp, fp, mask}, outputs: fponly}
|
w3kw = regInfo{inputs: []regMask{fp, fp, fp, mask}, outputs: fponly}
|
||||||
wgpw = regInfo{inputs: []regMask{fp, gp}, outputs: fponly}
|
wgpw = regInfo{inputs: []regMask{fp, gp}, outputs: fponly}
|
||||||
wgp = regInfo{inputs: wonly, outputs: gponly}
|
wgp = regInfo{inputs: wzonly, outputs: gponly}
|
||||||
wfpw = regInfo{inputs: []regMask{w, fp}, outputs: wonly}
|
wfpw = regInfo{inputs: []regMask{wz, fp}, outputs: wonly}
|
||||||
wfpkw = regInfo{inputs: []regMask{w, fp, mask}, outputs: wonly}
|
wfpkw = regInfo{inputs: []regMask{wz, fp, mask}, outputs: wonly}
|
||||||
|
|
||||||
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
|
||||||
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
|
kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
|
||||||
gpk = regInfo{inputs: gponly, outputs: maskonly}
|
gpk = regInfo{inputs: gponly, outputs: maskonly}
|
||||||
kgp = regInfo{inputs: maskonly, outputs: gponly}
|
kgp = regInfo{inputs: maskonly, outputs: gponly}
|
||||||
|
|
||||||
|
x15only = regInfo{inputs: nil, outputs: []regMask{x15}}
|
||||||
|
|
||||||
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
prefreg = regInfo{inputs: []regMask{gpspsbg}}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1375,9 +1380,9 @@ func init() {
|
||||||
{name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"},
|
{name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"},
|
||||||
{name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"},
|
{name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"},
|
||||||
|
|
||||||
{name: "Zero128", argLength: 0, reg: v01, asm: "VPXOR"},
|
{name: "Zero128", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
|
||||||
{name: "Zero256", argLength: 0, reg: v01, asm: "VPXOR"},
|
{name: "Zero256", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
|
||||||
{name: "Zero512", argLength: 0, reg: w01, asm: "VPXORQ"},
|
{name: "Zero512", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
|
||||||
|
|
||||||
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
|
{name: "VZEROUPPER", argLength: 0, asm: "VZEROUPPER"},
|
||||||
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
|
||||||
|
|
@ -1433,7 +1438,7 @@ func init() {
|
||||||
ParamFloatRegNames: "X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14",
|
ParamFloatRegNames: "X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14",
|
||||||
gpregmask: gp,
|
gpregmask: gp,
|
||||||
fpregmask: fp,
|
fpregmask: fp,
|
||||||
specialregmask: x15 | mask,
|
specialregmask: mask,
|
||||||
framepointerreg: int8(num["BP"]),
|
framepointerreg: int8(num["BP"]),
|
||||||
linkreg: -1, // not used
|
linkreg: -1, // not used
|
||||||
})
|
})
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1440,6 +1440,13 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||||
s.sb = v.ID
|
s.sb = v.ID
|
||||||
case OpARM64ZERO:
|
case OpARM64ZERO:
|
||||||
s.assignReg(s.ZeroIntReg, v, v)
|
s.assignReg(s.ZeroIntReg, v, v)
|
||||||
|
case OpAMD64Zero128, OpAMD64Zero256, OpAMD64Zero512:
|
||||||
|
regspec := s.regspec(v)
|
||||||
|
m := regspec.outputs[0].regs
|
||||||
|
if countRegs(m) != 1 {
|
||||||
|
f.Fatalf("bad fixed-register op %s", v)
|
||||||
|
}
|
||||||
|
s.assignReg(pickReg(m), v, v)
|
||||||
default:
|
default:
|
||||||
f.Fatalf("unknown fixed-register op %s", v)
|
f.Fatalf("unknown fixed-register op %s", v)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue