cmd/compile: simplify zerorange on arm64

Get rid of large zeroing cases. We only use this code
for small things now.

Change-Id: Iba0a98785c5b4b72cf031763edb69ff741ca41af
Reviewed-on: https://go-review.googlesource.com/c/go/+/678936
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Freeman <mark@golang.org>
This commit is contained in:
Keith Randall 2025-06-03 13:06:25 -07:00 committed by Gopher Robot
parent 8cd85e602a
commit 5e94d72158

View file

@ -5,9 +5,7 @@
package arm64
import (
"cmd/compile/internal/ir"
"cmd/compile/internal/objw"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/arm64"
)
@ -22,47 +20,20 @@ func padframe(frame int64) int64 {
}
func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
if cnt == 0 {
return p
if cnt%8 != 0 {
panic("zeroed region not aligned")
}
if cnt < int64(4*types.PtrSize) {
for i := int64(0); i < cnt; i += int64(types.PtrSize) {
p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off+i)
}
} else if cnt <= int64(128*types.PtrSize) {
if cnt%(2*int64(types.PtrSize)) != 0 {
p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off)
off += int64(types.PtrSize)
cnt -= int64(types.PtrSize)
}
p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REG_R20, 0)
p = pp.Append(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off, obj.TYPE_REG, arm64.REG_R20, 0)
p.Reg = arm64.REG_R20
p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
p.To.Name = obj.NAME_EXTERN
p.To.Sym = ir.Syms.Duffzero
p.To.Offset = 4 * (64 - cnt/(2*int64(types.PtrSize)))
} else {
// Not using REGTMP, so this is async preemptible (async preemption clobbers REGTMP).
// We are at the function entry, where no register is live, so it is okay to clobber
// other registers
const rtmp = arm64.REG_R20
p = pp.Append(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, rtmp, 0)
p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0)
p = pp.Append(p, arm64.AADD, obj.TYPE_REG, rtmp, 0, obj.TYPE_REG, arm64.REGRT1, 0)
p.Reg = arm64.REGRT1
p = pp.Append(p, arm64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, rtmp, 0)
p = pp.Append(p, arm64.AADD, obj.TYPE_REG, rtmp, 0, obj.TYPE_REG, arm64.REGRT2, 0)
p.Reg = arm64.REGRT1
p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGRT1, int64(types.PtrSize))
p.Scond = arm64.C_XPRE
p1 := p
p = pp.Append(p, arm64.ACMP, obj.TYPE_REG, arm64.REGRT1, 0, obj.TYPE_NONE, 0, 0)
p.Reg = arm64.REGRT2
p = pp.Append(p, arm64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0)
p.To.SetTarget(p1)
off += 8 // return address was ignored in offset calculation
for cnt >= 16 && off < 512 {
p = pp.Append(p, arm64.ASTP, obj.TYPE_REGREG, arm64.REGZERO, arm64.REGZERO, obj.TYPE_MEM, arm64.REGSP, off)
off += 16
cnt -= 16
}
for cnt != 0 {
p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, off)
off += 8
cnt -= 8
}
return p
}