mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: make amd64 version of zerorange regabi-friendly
Change the amd64 version of 'zerorange' to avoid using RAX/RDI, since it can be called in a context when one of these registers is live (contains an incoming parameter). Updates #40724. Change-Id: Ibfa2b4e156b876354d4f8bd04eb8773c7056d948 Reviewed-on: https://go-review.googlesource.com/c/go/+/305829 Trust: Than McIntosh <thanm@google.com> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
c40dc677be
commit
3300390ec7
1 changed files with 19 additions and 13 deletions
|
|
@ -56,7 +56,7 @@ func dzDI(b int64) int64 {
|
||||||
|
|
||||||
func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
|
func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
|
||||||
const (
|
const (
|
||||||
ax = 1 << iota // if AX is already zeroed.
|
r13 = 1 << iota // if R13 is already zeroed.
|
||||||
x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero.
|
x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero.
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -69,21 +69,21 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
|
||||||
if cnt%int64(types.PtrSize) != 0 {
|
if cnt%int64(types.PtrSize) != 0 {
|
||||||
base.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
|
base.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
|
||||||
}
|
}
|
||||||
if *state&ax == 0 {
|
if *state&r13 == 0 {
|
||||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
|
||||||
*state |= ax
|
*state |= r13
|
||||||
}
|
}
|
||||||
p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off)
|
p = pp.Append(p, x86.AMOVL, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
|
||||||
off += int64(types.PtrSize)
|
off += int64(types.PtrSize)
|
||||||
cnt -= int64(types.PtrSize)
|
cnt -= int64(types.PtrSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
if cnt == 8 {
|
if cnt == 8 {
|
||||||
if *state&ax == 0 {
|
if *state&r13 == 0 {
|
||||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
|
||||||
*state |= ax
|
*state |= r13
|
||||||
}
|
}
|
||||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off)
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_MEM, x86.REG_SP, off)
|
||||||
} else if !isPlan9 && cnt <= int64(8*types.RegSize) {
|
} else if !isPlan9 && cnt <= int64(8*types.RegSize) {
|
||||||
if !objabi.Experiment.RegabiG && *state&x15 == 0 {
|
if !objabi.Experiment.RegabiG && *state&x15 == 0 {
|
||||||
p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
|
p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
|
||||||
|
|
@ -102,17 +102,23 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
|
||||||
p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
|
p = pp.Append(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_REG, x86.REG_X15, 0)
|
||||||
*state |= x15
|
*state |= x15
|
||||||
}
|
}
|
||||||
|
// Save DI to r12. With the amd64 Go register abi, DI can contain
|
||||||
|
// an incoming parameter, whereas R12 is always scratch.
|
||||||
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
|
||||||
|
// Emit duffzero call
|
||||||
p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
|
p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
|
||||||
p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
|
p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
|
||||||
p.To.Sym = ir.Syms.Duffzero
|
p.To.Sym = ir.Syms.Duffzero
|
||||||
|
|
||||||
if cnt%16 != 0 {
|
if cnt%16 != 0 {
|
||||||
p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
|
p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
|
||||||
}
|
}
|
||||||
|
// Restore DI from r12
|
||||||
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if *state&ax == 0 {
|
if *state&r13 == 0 {
|
||||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_R13, 0)
|
||||||
*state |= ax
|
*state |= r13
|
||||||
}
|
}
|
||||||
|
|
||||||
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
|
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue