Revert "cmd/compile: redo arm64 LR/FP save and restore"

This reverts commit 719dfcf8a8.

Reason for revert: Causing crashes.

Change-Id: I0b8526dd03d82fa074ce4f97f1789eeac702b3eb
Reviewed-on: https://go-review.googlesource.com/c/go/+/709755
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Keith Randall 2025-10-07 07:58:50 -07:00 committed by Gopher Robot
parent 6469954203
commit c938051dd0
23 changed files with 356 additions and 298 deletions

View file

@ -576,19 +576,19 @@ A function's stack frame, after the frame is created, is laid out as
follows:
+------------------------------+
| return PC |
| frame pointer on entry | ← R29 points to
| ... locals ... |
| ... outgoing arguments ... |
| unused word | ← RSP points to
| return PC | ← RSP points to
| frame pointer on entry |
+------------------------------+ ↓ lower addresses
The "return PC" is loaded to the link register, R30, as part of the
arm64 `CALL` operation.
On entry, a function pushes R30 (the return address) and R29
(the caller's frame pointer) onto the bottom of the stack. It then
subtracts a constant from RSP to open its stack frame.
On entry, a function subtracts from RSP to open its stack frame, and
saves the values of R30 and R29 at the bottom of the frame.
Specifically, R30 is saved at 0(RSP) and R29 is saved at -8(RSP),
after RSP is updated.
A leaf function that does not require any stack space may omit the
saved R30 and R29.

View file

@ -11,12 +11,10 @@ import (
)
func padframe(frame int64) int64 {
// arm64 requires frame sizes here that are 8 mod 16.
// With the additional (unused) slot at the bottom of the frame,
// that makes an aligned 16 byte frame.
// Adding a save region for LR+FP does not change the alignment.
if frame != 0 {
frame += (-(frame + 8)) & 15
// arm64 requires that the frame size (not counting saved FP&LR)
// be 16 bytes aligned. If not, pad it.
if frame%16 != 0 {
frame += 16 - (frame % 16)
}
return frame
}

View file

@ -221,7 +221,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
for i := 0; i < len(args); i++ {
a := args[i]
// Offset by size of the unused slot before start of args.
// Offset by size of the saved LR slot.
addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
// Look for double-register operations if we can.
if i < len(args)-1 {

View file

@ -393,16 +393,10 @@ func StackOffset(slot ssa.LocalSlot) int32 {
case ir.PAUTO:
off = n.FrameOffset()
if base.Ctxt.Arch.FixedFrameSize == 0 {
// x86 return address
off -= int64(types.PtrSize)
}
if buildcfg.FramePointerEnabled {
// frame pointer
off -= int64(types.PtrSize)
if buildcfg.GOARCH == "arm64" {
// arm64 return address also
off -= int64(types.PtrSize)
}
}
}
return int32(off + slot.Off)

View file

@ -7150,7 +7150,6 @@ func defframe(s *State, e *ssafn, f *ssa.Func) {
// Insert code to zero ambiguously live variables so that the
// garbage collector only sees initialized values when it
// looks for pointers.
// Note: lo/hi are offsets from varp and will be negative.
var lo, hi int64
// Opaque state for backend to use. Current backends use it to
@ -7158,7 +7157,7 @@ func defframe(s *State, e *ssafn, f *ssa.Func) {
var state uint32
// Iterate through declarations. Autos are sorted in decreasing
// frame offset order (least negative to most negative).
// frame offset order.
for _, n := range e.curfn.Dcl {
if !n.Needzero() {
continue

View file

@ -51,6 +51,7 @@ type ctxt7 struct {
blitrl *obj.Prog
elitrl *obj.Prog
autosize int32
extrasize int32
instoffset int64
pc int64
pool struct {
@ -1121,7 +1122,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
}
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)}
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
p.To.Offset &= 0xffffffff // extrasize is no longer needed
// Process literal pool and allocate initial program counter for each Prog, before
// generating branch veneers.
@ -2117,8 +2119,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
// The frame top 16 bytes are for LR/FP
c.instoffset = int64(c.autosize) + a.Offset - extrasize
// The frame top 8 or 16 bytes are for FP
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
return autoclass(c.instoffset)
case obj.NAME_PARAM:
@ -2178,8 +2180,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
// The frame top 16 bytes are for LR/FP
c.instoffset = int64(c.autosize) + a.Offset - extrasize
// The frame top 8 or 16 bytes are for FP
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
case obj.NAME_PARAM:
if a.Reg == REGSP {

View file

@ -36,6 +36,7 @@ import (
"cmd/internal/src"
"cmd/internal/sys"
"internal/abi"
"internal/buildcfg"
"log"
"math"
)
@ -471,8 +472,6 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
obj.Nopout(p)
}
const extrasize = 16 // space needed in the frame for LR+FP
func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
return
@ -522,26 +521,33 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
c.autosize = int32(textstksiz)
if p.Mark&LEAF != 0 && c.autosize == 0 {
// A leaf function with no locals needs no frame.
// A leaf function with no locals has no frame.
p.From.Sym.Set(obj.AttrNoFrame, true)
}
if !p.From.Sym.NoFrame() {
// If there is a stack frame at all, it includes
// space for the (now unused) word at [SP:SP+8].
// space to save the LR.
c.autosize += 8
}
// Round up to a multiple of 16.
c.autosize += (-c.autosize) & 15
if c.autosize != 0 {
// Allocate an extra 16 bytes at the top of the frame
// to save LR+FP.
extrasize := int32(0)
if c.autosize%16 == 8 {
// Allocate extra 8 bytes on the frame top to save FP
extrasize = 8
} else if c.autosize&(16-1) == 0 {
// Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
extrasize = 16
} else {
c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
}
c.autosize += extrasize
c.cursym.Func().Locals += extrasize
p.To.Offset = int64(c.autosize)
// low 32 bits for autosize
// high 32 bits for extrasize
p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
} else {
// NOFRAME
p.To.Offset = 0
@ -574,72 +580,120 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
var prologueEnd *obj.Prog
aoffset := c.autosize
if aoffset < 16 {
log.Fatalf("aoffset too small %d", aoffset)
if aoffset > 0xf0 {
// MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
// so the maximum aoffset value is 0xf0.
aoffset = 0xf0
}
// Frame is non-empty. Make sure to save link register, even if
// it is a leaf function, so that traceback works.
q = p
if c.autosize > aoffset {
// Frame size is too large for a MOVD.W instruction. Store the frame pointer
// register and link register before decrementing SP, so if a signal comes
// during the execution of the function prologue, the traceback code will
// not see a half-updated stack frame.
// Store return address and frame pointer at the top of the stack frame.
// STP.W (R29, R30), -16(SP)
// SUB $autosize, RSP, R20
q1 = obj.Appendp(q, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = int64(c.autosize)
q1.Reg = REGSP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REG_R20
prologueEnd = q1
// STP (R29, R30), -8(R20)
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASTP
q1.From.Type = obj.TYPE_REGREG
q1.From.Reg = REGFP
q1.From.Offset = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REG_RSP
q1.To.Offset = -16
q1.To.Reg = REG_R20
q1.To.Offset = -8
// This is not async preemptible, as if we open a frame
// at the current SP, it will clobber the saved LR.
q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
// MOVD R20, RSP
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REG_R20
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGSP
q1.Spadj = c.autosize
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
if buildcfg.GOOS == "ios" {
// iOS does not support SA_ONSTACK. We will run the signal handler
// on the G stack. If we write below SP, it may be clobbered by
// the signal handler. So we save FP and LR after decrementing SP.
// STP (R29, R30), -8(RSP)
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASTP
q1.From.Type = obj.TYPE_REGREG
q1.From.Reg = REGFP
q1.From.Offset = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
q1.To.Offset = -8
}
} else {
// small frame, update SP and save LR in a single MOVD.W instruction.
// So if a signal comes during the execution of the function prologue,
// the traceback code will not see a half-updated stack frame.
// Also, on Linux, in a cgo binary we may get a SIGSETXID signal
// early on before the signal stack is set, as glibc doesn't allow
// us to block SIGSETXID. So it is important that we don't write below
// the SP until the signal stack is set.
// Luckily, all the functions from thread entry to setting the signal
// stack have small frames.
q1 = obj.Appendp(q, c.newprog)
q1.As = AMOVD
q1.Pos = p.Pos
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.Scond = C_XPRE
q1.To.Offset = int64(-aoffset)
q1.To.Reg = REGSP
q1.Spadj = aoffset
prologueEnd = q1
// Update frame pointer
// Frame pointer.
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGSP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGFP
// Allocate additional frame space.
adj := aoffset - 16
if adj > 0 {
// SUB $autosize-16, RSP
if adj < 1<<12 {
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = int64(adj)
q1.To.Type = obj.TYPE_REG
q1.From.Reg = REGFP
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
} else {
// Constant too big for atomic subtract.
// Materialize in tmp register first.
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = int64(adj)
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGTMP
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGTMP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGSP
}
q1.Spadj = adj
q1.To.Offset = -8
}
prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = 8
q1.Reg = REGSP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGFP
case obj.ARET:
nocache(p)
if p.From.Type == obj.TYPE_CONST {
@ -653,56 +707,105 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
}
p.To = obj.Addr{}
aoffset := c.autosize
if aoffset > 0 {
if aoffset < 16 {
log.Fatalf("aoffset too small %d", aoffset)
}
adj := aoffset - 16
if adj > 0 {
if adj < 1<<12 {
// ADD $adj, RSP, RSP
if c.cursym.Func().Text.Mark&LEAF != 0 {
if aoffset != 0 {
// Restore frame pointer.
// ADD $framesize-8, RSP, R29
p.As = AADD
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(adj)
p.From.Offset = int64(c.autosize) - 8
p.Reg = REGSP
p.To.Type = obj.TYPE_REG
p.To.Reg = REGFP
// Pop stack frame.
// ADD $framesize, RSP, RSP
p = obj.Appendp(p, c.newprog)
p.As = AADD
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(c.autosize)
p.To.Type = obj.TYPE_REG
p.To.Reg = REGSP
p.Spadj = -c.autosize
}
} else if aoffset <= 0xF0 {
// small frame, restore LR and update SP in a single MOVD.P instruction.
// There is no correctness issue to use a single LDP for LR and FP,
// but the instructions are not pattern matched with the prologue's
// MOVD.W and MOVD, which may cause performance issue in
// store-forwarding.
// MOVD -8(RSP), R29
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
p.From.Offset = -8
p.To.Type = obj.TYPE_REG
p.To.Reg = REGFP
p = obj.Appendp(p, c.newprog)
// MOVD.P offset(RSP), R30
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.Scond = C_XPOST
p.From.Offset = int64(aoffset)
p.From.Reg = REGSP
p.To.Type = obj.TYPE_REG
p.To.Reg = REGLINK
p.Spadj = -aoffset
} else {
// LDP -8(RSP), (R29, R30)
p.As = ALDP
p.From.Type = obj.TYPE_MEM
p.From.Offset = -8
p.From.Reg = REGSP
p.To.Type = obj.TYPE_REGREG
p.To.Reg = REGFP
p.To.Offset = REGLINK
if aoffset < 1<<12 {
// ADD $aoffset, RSP, RSP
q = newprog()
q.As = AADD
q.From.Type = obj.TYPE_CONST
q.From.Offset = int64(aoffset)
q.To.Type = obj.TYPE_REG
q.To.Reg = REGSP
q.Spadj = -aoffset
q.Pos = p.Pos
q.Link = p.Link
p.Link = q
p = q
} else {
// Put frame size in a separate register and
// add it in with a single instruction,
// so we never have a partial frame during
// the epilog. See issue 73259.
// MOVD $adj, REGTMP
p.As = AMOVD
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(adj)
p.To.Type = obj.TYPE_REG
p.To.Reg = REGTMP
// MOVD $aoffset, REGTMP
q = newprog()
q.As = AMOVD
q.From.Type = obj.TYPE_CONST
q.From.Offset = int64(aoffset)
q.To.Type = obj.TYPE_REG
q.To.Reg = REGTMP
q.Pos = p.Pos
q.Link = p.Link
p.Link = q
p = q
// ADD REGTMP, RSP, RSP
p = obj.Appendp(p, c.newprog)
p.As = AADD
p.From.Type = obj.TYPE_REG
p.From.Reg = REGTMP
p.To.Type = obj.TYPE_REG
p.To.Reg = REGSP
q = newprog()
q.As = AADD
q.From.Type = obj.TYPE_REG
q.From.Reg = REGTMP
q.To.Type = obj.TYPE_REG
q.To.Reg = REGSP
q.Spadj = -aoffset
q.Pos = p.Pos
q.Link = p.Link
p.Link = q
p = q
}
p.Spadj = -adj
}
// Pop LR+FP.
// LDP.P 16(RSP), (R29, R30)
if p.As != obj.ARET {
p = obj.Appendp(p, c.newprog)
}
p.As = ALDP
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
p.From.Offset = 16
p.Scond = C_XPOST
p.To.Type = obj.TYPE_REGREG
p.To.Reg = REGFP
p.To.Offset = REGLINK
p.Spadj = -16
}
// If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
@ -765,11 +868,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Type = obj.TYPE_REG
p.From.Reg = REGLINK
} else {
/* MOVD framesize-8(RSP), Rd */
/* MOVD (RSP), Rd */
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
p.From.Offset = int64(c.autosize - 8)
}
}
if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
@ -804,12 +906,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
p.From.Offset = 0
}
if p.To.Type == obj.TYPE_MEM && p.To.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) {
p.Spadj += int32(-p.To.Offset)
}
if p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) {
p.Spadj += int32(-p.From.Offset)
}
}
}

View file

@ -60,7 +60,6 @@ func Init() (*sys.Arch, ld.Arch) {
MachorelocSize: 8,
PEreloc1: pereloc1,
TLSIEtoLE: tlsIEtoLE,
ReturnAddressAtTopOfFrame: true,
ELF: ld.ELFArch{
Linuxdynld: "/lib64/ld-linux-x86-64.so.2",

View file

@ -58,7 +58,6 @@ func Init() (*sys.Arch, ld.Arch) {
MachorelocSize: 8,
PEreloc1: pereloc1,
Trampoline: trampoline,
ReturnAddressAtTopOfFrame: true,
ELF: ld.ELFArch{
Androiddynld: "/system/bin/linker64",

View file

@ -1544,14 +1544,9 @@ func (d *dwctxt) writeframes(fs loader.Sym) dwarfSecInfo {
if pcsp.Value > 0 {
// The return address is preserved at (CFA-frame_size)
// after a stack frame has been allocated.
off := -spdelta
if thearch.ReturnAddressAtTopOfFrame {
// Except arm64, which has it at the top of frame.
off = -int64(d.arch.PtrSize)
}
deltaBuf = append(deltaBuf, dwarf.DW_CFA_offset_extended_sf)
deltaBuf = dwarf.AppendUleb128(deltaBuf, uint64(thearch.Dwarfreglr))
deltaBuf = dwarf.AppendSleb128(deltaBuf, off/dataAlignmentFactor)
deltaBuf = dwarf.AppendSleb128(deltaBuf, -spdelta/dataAlignmentFactor)
} else {
// The return address is restored into the link register
// when a stack frame has been de-allocated.

View file

@ -263,10 +263,6 @@ type Arch struct {
// optional override for assignAddress
AssignAddress func(ldr *loader.Loader, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64)
// Reports whether the return address is stored at the top (highest address)
// of the stack frame.
ReturnAddressAtTopOfFrame bool
// ELF specific information.
ELF ELFArch
}

View file

@ -9,6 +9,7 @@ import (
"cmd/internal/objabi"
"cmd/link/internal/loader"
"fmt"
"internal/buildcfg"
"sort"
"strings"
)
@ -61,6 +62,10 @@ func (ctxt *Link) doStackCheck() {
// that there are at least StackLimit bytes available below SP
// when morestack returns.
limit := objabi.StackNosplit(*flagRace) - sc.callSize
if buildcfg.GOARCH == "arm64" {
// Need an extra 8 bytes below SP to save FP.
limit -= 8
}
// Compute stack heights without any back-tracking information.
// This will almost certainly succeed and we can simply

View file

@ -57,7 +57,6 @@ func Init() (*sys.Arch, ld.Arch) {
Gentext: gentext,
Machoreloc1: machoreloc1,
PEreloc1: pereloc1,
ReturnAddressAtTopOfFrame: true,
ELF: ld.ELFArch{
Linuxdynld: "/lib/ld-linux.so.2",

View file

@ -50,7 +50,9 @@ TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
CBZ R4, nocgo
MOVD $_rt0_arm64_lib_go(SB), R0
MOVD $0, R1
SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
BL (R4)
ADD $16, RSP
B restore
nocgo:
@ -369,6 +371,7 @@ switch:
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R0
MOVD R0, RSP
MOVD (g_sched+gobuf_bp)(g), R29
MOVD $0, (g_sched+gobuf_sp)(g)
MOVD $0, (g_sched+gobuf_bp)(g)
RET
@ -378,8 +381,8 @@ noswitch:
// Using a tail call here cleans up tracebacks since we won't stop
// at an intermediate systemstack.
MOVD 0(R26), R3 // code pointer
ADD $16, RSP
LDP.P 16(RSP), (R29,R30) // restore FP, LR
MOVD.P 16(RSP), R30 // restore LR
SUB $8, RSP, R29 // restore FP
B (R3)
// func switchToCrashStack0(fn func())
@ -1048,7 +1051,7 @@ again:
// Smashes R0.
TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
MOVD $runtime·systemstack_switch(SB), R0
ADD $12, R0 // get past prologue
ADD $8, R0 // get past prologue
MOVD R0, (g_sched+gobuf_pc)(g)
MOVD RSP, R0
MOVD R0, (g_sched+gobuf_sp)(g)
@ -1066,7 +1069,9 @@ TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
MOVD fn+0(FP), R1
MOVD arg+8(FP), R0
SUB $16, RSP // skip over saved frame pointer below RSP
BL (R1)
ADD $16, RSP // skip over saved frame pointer below RSP
RET
// func asmcgocall(fn, arg unsafe.Pointer) int32
@ -1231,9 +1236,9 @@ havem:
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
MOVD (g_sched+gobuf_pc)(g), R5
MOVD R5, -8(R4)
MOVD R5, -48(R4)
MOVD (g_sched+gobuf_bp)(g), R5
MOVD R5, -16(R4)
MOVD R5, -56(R4)
// Gather our arguments into registers.
MOVD fn+0(FP), R1
MOVD frame+8(FP), R2
@ -1247,7 +1252,7 @@ havem:
CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
// Restore g->sched (== m->curg->sched) from saved values.
MOVD 40(RSP), R5
MOVD 0(RSP), R5
MOVD R5, (g_sched+gobuf_pc)(g)
MOVD RSP, R4
ADD $48, R4, R4
@ -1485,57 +1490,10 @@ GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
//
// This is ABIInternal because Go code injects its PC directly into new
// goroutine stacks.
//
// State before debugger starts doing anything:
// | current |
// | stack |
// +-------------+ <- SP = origSP
// stopped executing at PC = origPC
// some values are in LR (origLR) and FP (origFP)
//
// After debugger has done steps 1-6 above:
// | current |
// | stack |
// +-------------+ <- origSP
// | ----- | (used to be a slot to store frame pointer on entry to origPC's frame.)
// +-------------+
// | origLR |
// +-------------+ <- SP
// | ----- |
// +-------------+
// | argsize |
// +-------------+
// LR = origPC, PC = debugCallV2
//
// debugCallV2 then modifies the stack up to the "good" label:
// | current |
// | stack |
// +-------------+ <- origSP
// | ----- | (used to be a slot to store frame pointer on entry to origPC's frame.)
// +-------------+
// | origLR |
// +-------------+ <- where debugger left SP
// | origPC |
// +-------------+
// | origFP |
// +-------------+ <- FP = SP + 256
// | saved |
// | registers |
// | (224 bytes) |
// +-------------+ <- SP + 32
// | space for |
// | outargs |
// +-------------+ <- SP + 8
// | argsize |
// +-------------+ <- SP
TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R30, -8(RSP) // save origPC
MOVD -16(RSP), R30 // save argsize in R30 temporarily
MOVD.W R29, -16(RSP) // push origFP
MOVD RSP, R29 // frame pointer chain now set up
SUB $256, RSP, RSP // allocate frame
MOVD R30, (RSP) // Save argsize on the stack
STP (R29, R30), -280(RSP)
SUB $272, RSP, RSP
SUB $8, RSP, R29
// Save all registers that may contain pointers so they can be
// conservatively scanned.
//
@ -1557,8 +1515,7 @@ TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
STP (R0, R1), (4*8)(RSP)
// Perform a safe-point check.
MOVD 264(RSP), R0 // origPC
MOVD R0, 8(RSP)
MOVD R30, 8(RSP) // Caller's PC
CALL runtime·debugCallCheck(SB)
MOVD 16(RSP), R0
CBZ R0, good
@ -1602,7 +1559,7 @@ good:
CALL runtime·debugCallWrap(SB); \
JMP restore
MOVD (RSP), R0 // the argument frame size
MOVD 256(RSP), R0 // the argument frame size
DEBUG_CALL_DISPATCH(debugCall32<>, 32)
DEBUG_CALL_DISPATCH(debugCall64<>, 64)
DEBUG_CALL_DISPATCH(debugCall128<>, 128)
@ -1650,9 +1607,9 @@ restore:
LDP (6*8)(RSP), (R2, R3)
LDP (4*8)(RSP), (R0, R1)
MOVD 272(RSP), R30 // restore old lr (saved by (*sigctxt).pushCall)
LDP 256(RSP), (R29, R27) // restore old fp, set up resumption address
ADD $288, RSP, RSP // Pop frame, LR+FP, and block pushed by (*sigctxt).pushCall
LDP -8(RSP), (R29, R27)
ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
MOVD -16(RSP), R30 // restore old lr
JMP (R27)
// runtime.debugCallCheck assumes that functions defined with the

View file

@ -488,18 +488,26 @@ func genARM64(g *gen) {
l.stack += 8 // SP needs 16-byte alignment
}
// allocate frame, save PC (in R30), FP (in R29) of interrupted instruction
p("STP.W (R29, R30), -16(RSP)")
p("MOVD RSP, R29") // set up new frame pointer
// allocate frame, save PC of interrupted instruction (in LR)
p("MOVD R30, %d(RSP)", -l.stack)
p("SUB $%d, RSP", l.stack)
p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
p("SUB $8, RSP, R29") // set up new frame pointer
// On iOS, save the LR again after decrementing SP. We run the
// signal handler on the G stack (as it doesn't support sigaltstack),
// so any writes below SP may be clobbered.
p("#ifdef GOOS_ios")
p("MOVD R30, (RSP)")
p("#endif")
l.save(g)
p("CALL ·asyncPreempt2(SB)")
l.restore(g)
p("MOVD %d(RSP), R30", l.stack+16) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
p("LDP %d(RSP), (R29, R27)", l.stack) // Restore frame pointer. Load PC into regtmp.
p("ADD $%d, RSP", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
p("MOVD -8(RSP), R29") // restore frame pointer
p("MOVD (RSP), R27") // load PC to REGTMP
p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
p("RET (R27)")
}

View file

@ -1379,10 +1379,10 @@ func recovery(gp *g) {
// the caller
gp.sched.bp = fp - 2*goarch.PtrSize
case goarch.IsArm64 != 0:
// on arm64, the first two words of the frame are caller's PC
// (the saved LR register) and the caller's BP.
// Coincidentally, the same as amd64.
gp.sched.bp = fp - 2*goarch.PtrSize
// on arm64, the architectural bp points one word higher
// than the sp. fp is totally useless to us here, because it
// only gets us to the caller's fp.
gp.sched.bp = sp - goarch.PtrSize
}
gogo(&gp.sched)
}

View file

@ -4,9 +4,13 @@
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
STP.W (R29, R30), -16(RSP)
MOVD RSP, R29
MOVD R30, -496(RSP)
SUB $496, RSP
MOVD R29, -8(RSP)
SUB $8, RSP, R29
#ifdef GOOS_ios
MOVD R30, (RSP)
#endif
STP (R0, R1), 8(RSP)
STP (R2, R3), 24(RSP)
STP (R4, R5), 40(RSP)
@ -74,7 +78,8 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
LDP 40(RSP), (R4, R5)
LDP 24(RSP), (R2, R3)
LDP 8(RSP), (R0, R1)
MOVD 512(RSP), R30
LDP 496(RSP), (R29, R27)
ADD $528, RSP
MOVD 496(RSP), R30
MOVD -8(RSP), R29
MOVD (RSP), R27
ADD $512, RSP
RET (R27)

View file

@ -397,7 +397,7 @@ TEXT racecallatomic<>(SB), NOSPLIT, $0
// R3 = addr of incoming arg list
// Trigger SIGSEGV early.
MOVD 72(RSP), R3 // 1st arg is addr. after two small frames (32 bytes each), get it at 72(RSP)
MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
MOVB (R3), R13 // segv here if addr is bad
// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
MOVD runtime·racearenastart(SB), R10
@ -417,11 +417,10 @@ racecallatomic_ok:
// Addr is within the good range, call the atomic function.
load_g
MOVD g_racectx(g), R0 // goroutine context
MOVD 56(RSP), R1 // caller pc
MOVD 16(RSP), R1 // caller pc
MOVD R9, R2 // pc
ADD $72, RSP, R3
BL racecall<>(SB)
RET
ADD $40, RSP, R3
JMP racecall<>(SB) // does not return
racecallatomic_ignore:
// Addr is outside the good range.
// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
@ -436,9 +435,9 @@ racecallatomic_ignore:
// racecall will call LLVM race code which might clobber R28 (g)
load_g
MOVD g_racectx(g), R0 // goroutine context
MOVD 56(RSP), R1 // caller pc
MOVD 16(RSP), R1 // caller pc
MOVD R9, R2 // pc
ADD $72, RSP, R3 // arguments
ADD $40, RSP, R3 // arguments
BL racecall<>(SB)
// Call __tsan_go_ignore_sync_end.
MOVD $__tsan_go_ignore_sync_end(SB), R9
@ -477,6 +476,10 @@ TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
MOVD (g_sched+gobuf_sp)(R11), R12
MOVD R12, RSP
call:
// Decrement SP past where the frame pointer is saved in the Go arm64
// ABI (one word below the stack pointer) so the race detector library
// code doesn't clobber it
SUB $16, RSP
BL R9
MOVD R19, RSP
JMP (R20)

View file

@ -8,6 +8,7 @@ package runtime
import (
"internal/abi"
"internal/goarch"
"internal/runtime/sys"
"unsafe"
)
@ -62,11 +63,18 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) {
// We arrange lr, and pc to pretend the panicking
// function calls sigpanic directly.
// Always save LR to stack so that panics in leaf
// functions are correctly handled.
// This extra space is known to gentraceback.
// functions are correctly handled. This smashes
// the stack frame but we're not going back there
// anyway.
sp := c.sp() - sys.StackAlign // needs only sizeof uint64, but must align the stack
c.set_sp(sp)
*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
// Make sure a valid frame pointer is saved on the stack so that the
// frame pointer checks in adjustframe are happy, if they're enabled.
// Frame pointer unwinding won't visit the sigpanic frame, since
// sigpanic will save the same frame pointer before calling into a panic
// function.
*(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29()
pc := gp.sigpc
@ -88,6 +96,10 @@ func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
sp := c.sp() - 16 // SP needs 16-byte alignment
c.set_sp(sp)
*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
// Make sure a valid frame pointer is saved on the stack so that the
// frame pointer checks in adjustframe are happy, if they're enabled.
// This is not actually used for unwinding.
*(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29()
// Set up PC and LR to pretend the function being signaled
// calls targetPC at resumePC.
c.set_lr(uint64(resumePC))

View file

@ -579,27 +579,23 @@ var ptrnames = []string{
// | args to callee |
// +------------------+ <- frame->sp
//
// (arm64)
// (arm)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
// | <unused> |
// +------------------+ <- frame->fp (aka caller's sp)
// | return address |
// | caller's retaddr |
// +------------------+
// | caller's FP | (frame pointer always enabled: TODO)
// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
// | <unused> |
// | return address |
// +------------------+ <- frame->sp
//
// varp > sp means that the function has a frame;
// varp == sp means frameless function.
//
// Alignment padding, if needed, will be between "locals" and "args to callee".
type adjustinfo struct {
old stack
@ -713,8 +709,7 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
}
// Adjust saved frame pointer if there is one.
if goarch.ArchFamily == goarch.AMD64 && frame.argp-frame.varp == 2*goarch.PtrSize ||
goarch.ArchFamily == goarch.ARM64 && frame.argp-frame.varp == 3*goarch.PtrSize {
if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize {
if stackDebug >= 3 {
print(" saved bp\n")
}
@ -728,7 +723,10 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
throw("bad frame pointer")
}
}
// This is the caller's frame pointer saved in the current frame.
// On AMD64, this is the caller's frame pointer saved in the current
// frame.
// On ARM64, this is the frame pointer of the caller's caller saved
// by the caller in its frame (one word below its SP).
adjustpointer(adjinfo, unsafe.Pointer(frame.varp))
}

View file

@ -41,11 +41,6 @@ func badLR2(arg int) {
if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" {
lrOff = 32 // FIXED_FRAME or sys.MinFrameSize
}
if runtime.GOARCH == "arm64" {
// skip 8 bytes at bottom of parent frame, then point
// to the 8 bytes of the saved PC at the top of the frame.
lrOff = 16
}
lrPtr := (*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&arg)) - lrOff))
*lrPtr = 0xbad

View file

@ -175,11 +175,6 @@ func (u *unwinder) initAt(pc0, sp0, lr0 uintptr, gp *g, flags unwindFlags) {
// Start in the caller's frame.
if frame.pc == 0 {
if usesLR {
// TODO: this isn't right on arm64. But also, this should
// ~never happen. Calling a nil function will panic
// when loading the PC out of the closure, not when
// branching to that PC. (Closures should always have
// valid PCs in their first word.)
frame.pc = *(*uintptr)(unsafe.Pointer(frame.sp))
frame.lr = 0
} else {
@ -374,11 +369,7 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) {
var lrPtr uintptr
if usesLR {
if innermost && frame.sp < frame.fp || frame.lr == 0 {
if GOARCH == "arm64" {
lrPtr = frame.fp - goarch.PtrSize
} else {
lrPtr = frame.sp
}
frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr))
}
} else {
@ -394,17 +385,24 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) {
// On x86, call instruction pushes return PC before entering new function.
frame.varp -= goarch.PtrSize
}
if GOARCH == "arm64" && frame.varp > frame.sp {
frame.varp -= goarch.PtrSize // LR have been saved, skip over it.
}
// For architectures with frame pointers, if there's
// a frame, then there's a saved frame pointer here.
//
// NOTE: This code is not as general as it looks.
// On x86 and arm64, the ABI is to save the frame pointer word at the
// On x86, the ABI is to save the frame pointer word at the
// top of the stack frame, so we have to back down over it.
// No other architectures are framepointer-enabled at the moment.
// On arm64, the frame pointer should be at the bottom of
// the stack (with R29 (aka FP) = RSP), in which case we would
// not want to do the subtraction here. But we started out without
// any frame pointer, and when we wanted to add it, we didn't
// want to break all the assembly doing direct writes to 8(RSP)
// to set the first parameter to a called function.
// So we decided to write the FP link *below* the stack pointer
// (with R29 = RSP - 8 in Go functions).
// This is technically ABI-compatible but not standard.
// And it happens to end up mimicking the x86 layout.
// Other architectures may make different decisions.
if frame.varp > frame.sp && framepointer_enabled {
frame.varp -= goarch.PtrSize
}
@ -564,7 +562,7 @@ func (u *unwinder) finishInternal() {
gp := u.g.ptr()
if u.flags&(unwindPrintErrors|unwindSilentErrors) == 0 && u.frame.sp != gp.stktopsp {
print("runtime: g", gp.goid, ": frame.sp=", hex(u.frame.sp), " top=", hex(gp.stktopsp), "\n")
print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "]\n")
print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "\n")
throw("traceback did not unwind completely")
}
}

View file

@ -142,7 +142,7 @@ start 136
# (CallSize is 32 on ppc64, 8 on amd64 for frame pointer.)
start 96 nosplit
start 100 nosplit; REJECT ppc64 ppc64le
start 104 nosplit; REJECT ppc64 ppc64le
start 104 nosplit; REJECT ppc64 ppc64le arm64
start 108 nosplit; REJECT ppc64 ppc64le
start 112 nosplit; REJECT ppc64 ppc64le arm64
start 116 nosplit; REJECT ppc64 ppc64le
@ -160,7 +160,7 @@ start 136 nosplit; REJECT
# Because AMD64 uses frame pointer, it has 8 fewer bytes.
start 96 nosplit call f; f 0 nosplit
start 100 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le arm64
start 108 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
start 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64
start 116 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64
@ -176,7 +176,7 @@ start 136 nosplit call f; f 0 nosplit; REJECT
# Architectures differ in the same way as before.
start 96 nosplit call f; f 0 call f
start 100 nosplit call f; f 0 call f; REJECT ppc64 ppc64le
start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
start 108 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
start 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
start 116 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
@ -189,7 +189,7 @@ start 136 nosplit call f; f 0 call f; REJECT
# Indirect calls are assumed to be splitting functions.
start 96 nosplit callind
start 100 nosplit callind; REJECT ppc64 ppc64le
start 104 nosplit callind; REJECT ppc64 ppc64le amd64
start 104 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
start 108 nosplit callind; REJECT ppc64 ppc64le amd64
start 112 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
start 116 nosplit callind; REJECT ppc64 ppc64le amd64