diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go index d3de3906b48..300ccae350b 100644 --- a/src/cmd/cgo/gcc.go +++ b/src/cmd/cgo/gcc.go @@ -1121,6 +1121,9 @@ func (p *Package) hasPointer(f *File, t ast.Expr, top bool) bool { if t.Name == "error" { return true } + if t.Name == "any" { + return true + } if goTypes[t.Name] != nil { return false } diff --git a/src/cmd/cgo/internal/test/cgo_test.go b/src/cmd/cgo/internal/test/cgo_test.go index 5393552e07a..04e06cf95ec 100644 --- a/src/cmd/cgo/internal/test/cgo_test.go +++ b/src/cmd/cgo/internal/test/cgo_test.go @@ -106,6 +106,7 @@ func TestSetEnv(t *testing.T) { testSetEnv(t) } func TestThreadLock(t *testing.T) { testThreadLockFunc(t) } func TestUnsignedInt(t *testing.T) { testUnsignedInt(t) } func TestZeroArgCallback(t *testing.T) { testZeroArgCallback(t) } +func Test76340(t *testing.T) { test76340(t) } func BenchmarkCgoCall(b *testing.B) { benchCgoCall(b) } func BenchmarkGoString(b *testing.B) { benchGoString(b) } diff --git a/src/cmd/cgo/internal/test/test.go b/src/cmd/cgo/internal/test/test.go index e83e367174a..4dd14facb50 100644 --- a/src/cmd/cgo/internal/test/test.go +++ b/src/cmd/cgo/internal/test/test.go @@ -959,6 +959,18 @@ char * const issue75751p = &issue75751v; #define issue75751m issue75751p char * const volatile issue75751p2 = &issue75751v; #define issue75751m2 issue75751p2 + +typedef struct { void *t; void *v; } GoInterface; +extern int exportAny76340Param(GoInterface); +extern GoInterface exportAny76340Return(int); + +int issue76340testFromC(GoInterface obj) { + return exportAny76340Param(obj); +} + +GoInterface issue76340returnFromC(int val) { + return exportAny76340Return(val); +} */ import "C" @@ -2407,3 +2419,22 @@ func test69086(t *testing.T) { func test75751() int { return int(*C.issue75751m) + int(*C.issue75751m2) } + +// Issue 76340. +func test76340(t *testing.T) { + var emptyInterface C.GoInterface + r1 := C.issue76340testFromC(emptyInterface) + if r1 != 0 { + t.Errorf("issue76340testFromC with nil interface: got %d, want 0", r1) + } + + r2 := C.issue76340returnFromC(42) + if r2.t == nil && r2.v == nil { + t.Error("issue76340returnFromC(42) returned nil interface") + } + + r3 := C.issue76340returnFromC(0) + if r3.t != nil || r3.v != nil { + t.Errorf("issue76340returnFromC(0) returned non-nil interface: got %v, want nil", r3) + } +} diff --git a/src/cmd/cgo/internal/test/testx.go b/src/cmd/cgo/internal/test/testx.go index 9a63b9e1008..21ba52260ef 100644 --- a/src/cmd/cgo/internal/test/testx.go +++ b/src/cmd/cgo/internal/test/testx.go @@ -595,3 +595,21 @@ func test49633(t *testing.T) { t.Errorf("msg = %q, want 'hello'", v.msg) } } + +//export exportAny76340Param +func exportAny76340Param(obj any) C.int { + if obj == nil { + return 0 + } + + return 1 +} + +//export exportAny76340Return +func exportAny76340Return(val C.int) any { + if val == 0 { + return nil + } + + return int(val) +} diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go index 00c9e8c9a3e..dc1e5b29e59 100644 --- a/src/cmd/cgo/out.go +++ b/src/cmd/cgo/out.go @@ -1558,6 +1558,9 @@ func (p *Package) doCgoType(e ast.Expr, m map[ast.Expr]bool) *Type { if t.Name == "error" { return &Type{Size: 2 * p.PtrSize, Align: p.PtrSize, C: c("GoInterface")} } + if t.Name == "any" { + return &Type{Size: 2 * p.PtrSize, Align: p.PtrSize, C: c("GoInterface")} + } if r, ok := goTypes[t.Name]; ok { return goTypesFixup(r) } diff --git a/src/cmd/compile/abi-internal.md b/src/cmd/compile/abi-internal.md index eae230dc070..8de8356132a 100644 --- a/src/cmd/compile/abi-internal.md +++ b/src/cmd/compile/abi-internal.md @@ -833,6 +833,51 @@ The riscv64 has Zicsr extension for control and status register (CSR) and treated as scratch register. All bits in CSR are system flags and are not modified by Go. +### s390x architecture + +The s390x architecture uses R2 – R9 for integer arguments and integer results. + +It uses F0 – F15 for floating-point arguments and results. + +Special-purpose registers used within Go generated code and Go assembly code +are as follows: + +| Register | Call meaning | Return meaning | Body meaning | +| --- | --- | --- | --- | +| R0 | Zero value | Same | Same | +| R1 | Scratch | Scratch | Scratch | +| R10, R11 | used by the assembler | Same | Same | +| R12 | Closure context pointer | Same | Same | +| R13 | Current goroutine | Same | Same | +| R14 | Link register | Link register | Scratch | +| R15 | Stack pointer | Same | Same | + +*Rationale*: These register meanings are compatible with Go’s stack-based +calling convention. + +#### Stack layout + +The stack pointer, R15, grows down and is aligned to 8 bytes. + +A function's stack frame, after the frame is created, is laid out as +follows: + + +------------------------------+ + | ... locals ... | + | ... outgoing arguments ... | + | return PC | ← R15 points to + +------------------------------+ ↓ lower addresses + +This stack layout is used by both register-based (ABIInternal) and +stack-based (ABI0) calling conventions. + +The "return PC" is loaded to the link register R14, as part of the +s390x `BL` operation. + +#### Flags +The s390x architecture maintains a single condition code (CC) field in the Program Status Word (PSW). +Go-generated code sets and tests this condition code to control conditional branches. + ## Future directions ### Spill path improvements diff --git a/src/cmd/compile/internal/s390x/galign.go b/src/cmd/compile/internal/s390x/galign.go index d880834c220..1fb371a52c6 100644 --- a/src/cmd/compile/internal/s390x/galign.go +++ b/src/cmd/compile/internal/s390x/galign.go @@ -20,4 +20,6 @@ func Init(arch *ssagen.ArchInfo) { arch.SSAMarkMoves = ssaMarkMoves arch.SSAGenValue = ssaGenValue arch.SSAGenBlock = ssaGenBlock + arch.LoadRegResult = loadRegResult + arch.SpillArgReg = spillArgReg } diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index de00f1ef8ce..ce060597d9a 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -10,6 +10,7 @@ import ( "cmd/compile/internal/base" "cmd/compile/internal/ir" "cmd/compile/internal/logopt" + "cmd/compile/internal/objw" "cmd/compile/internal/ssa" "cmd/compile/internal/ssagen" "cmd/compile/internal/types" @@ -540,6 +541,19 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() ssagen.AddrAuto(&p.To, v) + case ssa.OpArgIntReg, ssa.OpArgFloatReg: + // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill + // The loop only runs once. + for _, a := range v.Block.Func.RegArgs { + // Pass the spill/unspill information along to the assembler, offset by size of + // the saved LR slot. + addr := ssagen.SpillSlotAddr(a, s390x.REGSP, base.Ctxt.Arch.FixedFrameSize) + s.FuncInfo().AddSpill( + obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) + } + v.Block.Func.RegArgs = nil + + ssagen.CheckArgReg(v) case ssa.OpS390XLoweredGetClosurePtr: // Closure pointer is R12 (already) ssagen.CheckLoweredGetClosurePtr(v) @@ -1029,3 +1043,22 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { s.Br(s390x.ABR, succs[1]) } } + +func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { + p := s.Prog(loadByType(t)) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_AUTO + p.From.Sym = n.Linksym() + p.From.Offset = n.FrameOffset() + off + p.To.Type = obj.TYPE_REG + p.To.Reg = reg + return p +} + +func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { + p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) + p.To.Name = obj.NAME_PARAM + p.To.Sym = n.Linksym() + p.Pos = p.Pos.WithNotStmt() + return p +} diff --git a/src/cmd/compile/internal/ssa/_gen/S390XOps.go b/src/cmd/compile/internal/ssa/_gen/S390XOps.go index c002d5bcc38..9e67a06ce8c 100644 --- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go @@ -484,10 +484,10 @@ func init() { {name: "CLEAR", argLength: 2, reg: regInfo{inputs: []regMask{ptr, 0}}, asm: "CLEAR", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Write"}, - {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem - {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem - {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem - {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem + {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem + {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem + {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem + {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem // (InvertFlags (CMP a b)) == (CMP b a) // InvertFlags is a pseudo-op which can't appear in assembly output. @@ -812,16 +812,18 @@ func init() { } archs = append(archs, arch{ - name: "S390X", - pkg: "cmd/internal/obj/s390x", - genfile: "../../s390x/ssa.go", - ops: S390Xops, - blocks: S390Xblocks, - regnames: regNamesS390X, - gpregmask: gp, - fpregmask: fp, - framepointerreg: -1, // not used - linkreg: int8(num["R14"]), + name: "S390X", + pkg: "cmd/internal/obj/s390x", + genfile: "../../s390x/ssa.go", + ops: S390Xops, + blocks: S390Xblocks, + regnames: regNamesS390X, + ParamIntRegNames: "R2 R3 R4 R5 R6 R7 R8 R9", + ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15", + gpregmask: gp, + fpregmask: fp, + framepointerreg: -1, // not used + linkreg: int8(num["R14"]), imports: []string{ "cmd/internal/obj/s390x", }, diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 3850fbf9616..9cfaa58839f 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -313,6 +313,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.registers = registersS390X[:] c.gpRegMask = gpRegMaskS390X c.fpRegMask = fpRegMaskS390X + c.intParamRegs = paramIntRegS390X + c.floatParamRegs = paramFloatRegS390X c.FPReg = framepointerRegS390X c.LinkReg = linkRegS390X c.hasGReg = true diff --git a/src/cmd/compile/internal/ssa/debug_lines_test.go b/src/cmd/compile/internal/ssa/debug_lines_test.go index 79dbd91c2fc..5dbfdeb7f6e 100644 --- a/src/cmd/compile/internal/ssa/debug_lines_test.go +++ b/src/cmd/compile/internal/ssa/debug_lines_test.go @@ -45,7 +45,7 @@ func testGoArch() string { func hasRegisterABI() bool { switch testGoArch() { - case "amd64", "arm64", "loong64", "ppc64", "ppc64le", "riscv": + case "amd64", "arm64", "loong64", "ppc64", "ppc64le", "riscv", "s390x": return true } return false diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 1ad737263a2..966d15b83ca 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -81087,7 +81087,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLstatic", auxType: auxCallOff, - argLen: 1, + argLen: -1, clobberFlags: true, call: true, reg: regInfo{ @@ -81097,7 +81097,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLtail", auxType: auxCallOff, - argLen: 1, + argLen: -1, clobberFlags: true, call: true, tailCall: true, @@ -81108,7 +81108,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLclosure", auxType: auxCallOff, - argLen: 3, + argLen: -1, clobberFlags: true, call: true, reg: regInfo{ @@ -81122,7 +81122,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLinter", auxType: auxCallOff, - argLen: 2, + argLen: -1, clobberFlags: true, call: true, reg: regInfo{ @@ -93219,8 +93219,8 @@ var registersS390X = [...]Register{ {31, s390x.REG_F15, "F15"}, {32, 0, "SB"}, } -var paramIntRegS390X = []int8(nil) -var paramFloatRegS390X = []int8(nil) +var paramIntRegS390X = []int8{2, 3, 4, 5, 6, 7, 8, 9} +var paramFloatRegS390X = []int8{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} var gpRegMaskS390X = regMask(23551) var fpRegMaskS390X = regMask(4294901760) var specialRegMaskS390X = regMask(0) diff --git a/src/cmd/internal/obj/s390x/a.out.go b/src/cmd/internal/obj/s390x/a.out.go index caf5ec09358..6b16d7a9bd5 100644 --- a/src/cmd/internal/obj/s390x/a.out.go +++ b/src/cmd/internal/obj/s390x/a.out.go @@ -139,8 +139,8 @@ const ( REG_RESERVED // end of allocated registers REGARG = -1 // -1 disables passing the first argument in register - REGRT1 = REG_R3 // used during zeroing of the stack - not reserved - REGRT2 = REG_R4 // used during zeroing of the stack - not reserved + REGRT1 = REG_R1 // used during zeroing of the stack - not reserved + REGRT2 = REG_R10 // used during zeroing of the stack - not reserved REGTMP = REG_R10 // scratch register used in the assembler and linker REGTMP2 = REG_R11 // scratch register used in the assembler and linker REGCTXT = REG_R12 // context for closures diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 44c1a7d586d..4bfc1f7b2c0 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -506,7 +506,13 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh // Save LR and REGCTXT const frameSize = 16 p = c.ctxt.StartUnsafePoint(p, c.newprog) + + // Spill arguments. This has to happen before we open + // any more frame space. + p = c.cursym.Func().SpillRegisterArgs(p, c.newprog) + // MOVD LR, -16(SP) + p = obj.Appendp(p, c.newprog) p.As = AMOVD p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} @@ -549,10 +555,12 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REGSP} p.Spadj = -frameSize + // Unspill arguments + p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog) p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) } - // MOVD g_stackguard(g), R3 + // MOVD g_stackguard(g), R10 p = obj.Appendp(p, c.newprog) // Jump back to here after morestack returns. pCheck = p @@ -565,7 +573,7 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R3 + p.To.Reg = REG_R10 // Mark the stack bound check and morestack call async nonpreemptible. // If we get preempted here, when resumed the preemption request is @@ -579,7 +587,7 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh p = obj.Appendp(p, c.newprog) p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 + p.From.Reg = REG_R10 p.Reg = REGSP p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH @@ -598,40 +606,40 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh // stack guard to incorrectly succeed. We explicitly // guard against underflow. // - // MOVD $(framesize-StackSmall), R4 - // CMPUBLT SP, R4, label-of-call-to-morestack + // MOVD $(framesize-StackSmall), R11 + // CMPUBLT SP, R11, label-of-call-to-morestack p = obj.Appendp(p, c.newprog) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = offset p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 + p.To.Reg = REG_R11 p = obj.Appendp(p, c.newprog) pPreempt = p p.As = ACMPUBLT p.From.Type = obj.TYPE_REG p.From.Reg = REGSP - p.Reg = REG_R4 + p.Reg = REG_R11 p.To.Type = obj.TYPE_BRANCH } // Check against the stack guard. We've ensured this won't underflow. - // ADD $-(framesize-StackSmall), SP, R4 - // CMPUBGE stackguard, R4, label-of-call-to-morestack + // ADD $-(framesize-StackSmall), SP, R11 + // CMPUBGE stackguard, R11, label-of-call-to-morestack p = obj.Appendp(p, c.newprog) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = -offset p.Reg = REGSP p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 + p.To.Reg = REG_R11 p = obj.Appendp(p, c.newprog) p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 - p.Reg = REG_R4 + p.From.Reg = REG_R10 + p.Reg = REG_R11 p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH @@ -654,18 +662,22 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre, pPreempt, pCheck *obj.Prog, fr pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog) pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog) + if pPreempt != nil { + pPreempt.To.SetTarget(pcdata) + } + pPre.To.SetTarget(pcdata) + + // Spill the register args that could be clobbered by the + // morestack code. + spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog) // MOVD LR, R5 - p = obj.Appendp(pcdata, c.newprog) - pPre.To.SetTarget(p) + p = obj.Appendp(spill, c.newprog) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_LR p.To.Type = obj.TYPE_REG p.To.Reg = REG_R5 - if pPreempt != nil { - pPreempt.To.SetTarget(p) - } // BL runtime.morestack(SB) p = obj.Appendp(p, c.newprog) @@ -680,10 +692,12 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre, pPreempt, pCheck *obj.Prog, fr p.To.Sym = c.ctxt.Lookup("runtime.morestack") } + // The instructions which unspill regs should be preemptible. p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + unspill := c.cursym.Func().UnspillRegisterArgs(p, c.newprog) // BR pCheck - p = obj.Appendp(p, c.newprog) + p = obj.Appendp(unspill, c.newprog) p.As = ABR p.To.Type = obj.TYPE_BRANCH diff --git a/src/internal/abi/abi_s390x.go b/src/internal/abi/abi_s390x.go new file mode 100644 index 00000000000..9b07f27382d --- /dev/null +++ b/src/internal/abi/abi_s390x.go @@ -0,0 +1,19 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.regabiargs + +package abi + +const ( + // See abi_generic.go. + + // R2 - R9. + IntArgRegs = 8 + + // F0 - F15 + FloatArgRegs = 16 + + EffectiveFloatRegSize = 8 +) diff --git a/src/internal/buildcfg/exp.go b/src/internal/buildcfg/exp.go index ddd05c6f287..df41f793388 100644 --- a/src/internal/buildcfg/exp.go +++ b/src/internal/buildcfg/exp.go @@ -65,6 +65,8 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { case "amd64", "arm64", "loong64", "ppc64le", "ppc64", "riscv64": regabiAlwaysOn = true regabiSupported = true + case "s390x": + regabiSupported = true } // Older versions (anything before V16) of dsymutil don't handle @@ -141,7 +143,7 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { flags.RegabiWrappers = true flags.RegabiArgs = true } - // regabi is only supported on amd64, arm64, loong64, riscv64, ppc64 and ppc64le. + // regabi is only supported on amd64, arm64, loong64, riscv64, s390x, ppc64 and ppc64le. if !regabiSupported { flags.RegabiWrappers = false flags.RegabiArgs = false diff --git a/src/internal/bytealg/compare_s390x.s b/src/internal/bytealg/compare_s390x.s index 539454870d3..64f537be99e 100644 --- a/src/internal/bytealg/compare_s390x.s +++ b/src/internal/bytealg/compare_s390x.s @@ -5,65 +5,93 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 - MOVD a_base+0(FP), R3 - MOVD a_len+8(FP), R4 - MOVD b_base+24(FP), R5 - MOVD b_len+32(FP), R6 - LA ret+48(FP), R7 +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 +#ifndef GOEXPERIMENT_regabiargs + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R3 + MOVD b_base+24(FP), R4 + MOVD b_len+32(FP), R5 + LA ret+48(FP), R6 +#else + // R2 = a_base + // R3 = a_len + // R4 = a_cap (unused) + // R5 = b_base (want in R4) + // R6 = b_len (want in R5) + // R7 = b_cap (unused) + MOVD R5, R4 + MOVD R6, R5 +#endif BR cmpbody<>(SB) -TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 - MOVD a_base+0(FP), R3 - MOVD a_len+8(FP), R4 - MOVD b_base+16(FP), R5 - MOVD b_len+24(FP), R6 - LA ret+32(FP), R7 +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R3 + MOVD b_base+16(FP), R4 + MOVD b_len+24(FP), R5 + LA ret+32(FP), R6 +#endif + // R2 = a_base + // R3 = a_len + // R4 = b_base + // R5 = b_len + BR cmpbody<>(SB) // input: -// R3 = a -// R4 = alen -// R5 = b -// R6 = blen -// R7 = address of output word (stores -1/0/1 here) +// R2 = a +// R3 = alen +// R4 = b +// R5 = blen +// For regabiargs output value( -1/0/1 ) stored in R2 +// For !regabiargs address of output word( stores -1/0/1 ) stored in R6 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 - CMPBEQ R3, R5, cmplengths - MOVD R4, R8 - CMPBLE R4, R6, amin - MOVD R6, R8 + CMPBEQ R2, R4, cmplengths + MOVD R3, R7 + CMPBLE R3, R5, amin + MOVD R5, R7 amin: - CMPBEQ R8, $0, cmplengths - CMP R8, $256 + CMPBEQ R7, $0, cmplengths + CMP R7, $256 BLE tail loop: - CLC $256, 0(R3), 0(R5) + CLC $256, 0(R2), 0(R4) BGT gt BLT lt - SUB $256, R8 - MOVD $256(R3), R3 - MOVD $256(R5), R5 - CMP R8, $256 + SUB $256, R7 + MOVD $256(R2), R2 + MOVD $256(R4), R4 + CMP R7, $256 BGT loop tail: - SUB $1, R8 - EXRL $cmpbodyclc<>(SB), R8 + SUB $1, R7 + EXRL $cmpbodyclc<>(SB), R7 BGT gt BLT lt cmplengths: - CMP R4, R6 + CMP R3, R5 BEQ eq BLT lt gt: - MOVD $1, 0(R7) + MOVD $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET lt: - MOVD $-1, 0(R7) + MOVD $-1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET eq: - MOVD $0, 0(R7) + MOVD $0, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 - CLC $1, 0(R3), 0(R5) + CLC $1, 0(R2), 0(R4) RET diff --git a/src/internal/bytealg/equal_s390x.s b/src/internal/bytealg/equal_s390x.s index 67f814dfc1c..48e8d0f1547 100644 --- a/src/internal/bytealg/equal_s390x.s +++ b/src/internal/bytealg/equal_s390x.s @@ -6,80 +6,92 @@ #include "textflag.h" // memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - MOVD a+0(FP), R3 - MOVD b+8(FP), R5 - MOVD size+16(FP), R6 - LA ret+24(FP), R7 +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 +#ifndef GOEXPERIMENT_regabiargs + MOVD a+0(FP), R2 + MOVD b+8(FP), R3 + MOVD size+16(FP), R4 + LA ret+24(FP), R5 +#endif BR memeqbody<>(SB) // memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 - MOVD a+0(FP), R3 - MOVD b+8(FP), R5 - MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure - LA ret+16(FP), R7 +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 +#ifndef GOEXPERIMENT_regabiargs + MOVD a+0(FP), R2 + MOVD b+8(FP), R3 + LA ret+16(FP), R5 +#endif + + MOVD 8(R12), R4 // compiler stores size at offset 8 in the closure BR memeqbody<>(SB) // input: -// R3 = a -// R5 = b -// R6 = len -// R7 = address of output byte (stores 0 or 1 here) +// R2 = a +// R3 = b +// R4 = len +// For regabiargs output value( 0/1 ) stored in R2 +// For !regabiargs address of output byte( stores 0/1 ) stored in R5 // a and b have the same length TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 - CMPBEQ R3, R5, equal + CMPBEQ R2, R3, equal loop: - CMPBEQ R6, $0, equal - CMPBLT R6, $32, tiny - CMP R6, $256 + CMPBEQ R4, $0, equal + CMPBLT R4, $32, tiny + CMP R4, $256 BLT tail - CLC $256, 0(R3), 0(R5) + CLC $256, 0(R2), 0(R3) BNE notequal - SUB $256, R6 + SUB $256, R4 + LA 256(R2), R2 LA 256(R3), R3 - LA 256(R5), R5 BR loop tail: - SUB $1, R6, R8 + SUB $1, R4, R8 EXRL $memeqbodyclc<>(SB), R8 BEQ equal notequal: - MOVB $0, 0(R7) + MOVD $0, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVB R2, 0(R5) +#endif RET equal: - MOVB $1, 0(R7) + MOVD $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVB R2, 0(R5) +#endif RET tiny: - MOVD $0, R2 - CMPBLT R6, $16, lt16 - MOVD 0(R3), R8 - MOVD 0(R5), R9 + MOVD $0, R1 + CMPBLT R4, $16, lt16 + MOVD 0(R2), R8 + MOVD 0(R3), R9 CMPBNE R8, R9, notequal - MOVD 8(R3), R8 - MOVD 8(R5), R9 + MOVD 8(R2), R8 + MOVD 8(R3), R9 CMPBNE R8, R9, notequal - LA 16(R2), R2 - SUB $16, R6 + LA 16(R1), R1 + SUB $16, R4 lt16: - CMPBLT R6, $8, lt8 - MOVD 0(R3)(R2*1), R8 - MOVD 0(R5)(R2*1), R9 + CMPBLT R4, $8, lt8 + MOVD 0(R2)(R1*1), R8 + MOVD 0(R3)(R1*1), R9 CMPBNE R8, R9, notequal - LA 8(R2), R2 - SUB $8, R6 + LA 8(R1), R1 + SUB $8, R4 lt8: - CMPBLT R6, $4, lt4 - MOVWZ 0(R3)(R2*1), R8 - MOVWZ 0(R5)(R2*1), R9 + CMPBLT R4, $4, lt4 + MOVWZ 0(R2)(R1*1), R8 + MOVWZ 0(R3)(R1*1), R9 CMPBNE R8, R9, notequal - LA 4(R2), R2 - SUB $4, R6 + LA 4(R1), R1 + SUB $4, R4 lt4: #define CHECK(n) \ - CMPBEQ R6, $n, equal \ - MOVB n(R3)(R2*1), R8 \ - MOVB n(R5)(R2*1), R9 \ + CMPBEQ R4, $n, equal \ + MOVB n(R2)(R1*1), R8 \ + MOVB n(R3)(R1*1), R9 \ CMPBNE R8, R9, notequal CHECK(0) CHECK(1) @@ -88,5 +100,5 @@ lt4: BR equal TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 - CLC $1, 0(R3), 0(R5) + CLC $1, 0(R2), 0(R3) RET diff --git a/src/internal/bytealg/indexbyte_s390x.s b/src/internal/bytealg/indexbyte_s390x.s index cf88d92a24b..343ed672f76 100644 --- a/src/internal/bytealg/indexbyte_s390x.s +++ b/src/internal/bytealg/indexbyte_s390x.s @@ -5,104 +5,134 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 - MOVD b_base+0(FP), R3// b_base => R3 - MOVD b_len+8(FP), R4 // b_len => R4 - MOVBZ c+24(FP), R5 // c => R5 - MOVD $ret+32(FP), R2 // &ret => R9 - BR indexbytebody<>(SB) -TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 - MOVD s_base+0(FP), R3// s_base => R3 - MOVD s_len+8(FP), R4 // s_len => R4 - MOVBZ c+16(FP), R5 // c => R5 - MOVD $ret+24(FP), R2 // &ret => R9 - BR indexbytebody<>(SB) +TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOVD b_base+0(FP), R2// b_base => R2 + MOVD b_len+8(FP), R3 // b_len => R3 + MOVBZ c+24(FP), R4 // c => R4 + MOVD $ret+32(FP), R5 // &ret => R5 +#else + MOVD R5, R4 + AND $0xff, R4 +#endif + BR indexbytebody<>(SB) + +TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOVD s_base+0(FP), R2 // s_base => R2 + MOVD s_len+8(FP), R3 // s_len => R3 + MOVBZ c+16(FP), R4 // c => R4 + MOVD $ret+24(FP), R5 // &ret => R5 +#else + AND $0xff, R4 +#endif + BR indexbytebody<>(SB) // input: -// R3: s -// R4: s_len -// R5: c -- byte sought -// R2: &ret -- address to put index into +// R2: s +// R3: s_len +// R4: c -- byte sought +// For regabiargs output value(index) stored in R2 +// For !regabiargs address of output value(index) stored in R5 TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0 - CMPBEQ R4, $0, notfound - MOVD R3, R6 // store base for later - ADD R3, R4, R8 // the address after the end of the string - //if the length is small, use loop; otherwise, use vector or srst search - CMPBGE R4, $16, large + CMPBEQ R3, $0, notfound + MOVD R2, R6 // store base for later + ADD R2, R3, R8 // the address after the end of the string + //if the length is small, use loop; otherwise, use vector or srst search + CMPBGE R3, $16, large residual: - CMPBEQ R3, R8, notfound - MOVBZ 0(R3), R7 - LA 1(R3), R3 - CMPBNE R7, R5, residual + CMPBEQ R2, R8, notfound + MOVBZ 0(R2), R7 + LA 1(R2), R2 + CMPBNE R7, R4, residual found: - SUB R6, R3 - SUB $1, R3 - MOVD R3, 0(R2) - RET + SUB R6, R2 + SUB $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R5) +#endif + RET notfound: - MOVD $-1, 0(R2) - RET +#ifndef GOEXPERIMENT_regabiargs + MOVD $-1, 0(R5) +#else + MOVD $-1, R2 +#endif + RET large: - MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 - CMPBNE R1, $0, vectorimpl + MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 + CMPBNE R1, $0, vectorimpl srstimpl: // no vector facility - MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 + MOVBZ R4, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 srstloop: - WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) - BVS srstloop // interrupted - continue - BGT notfoundr0 + WORD $0xB25E0082 // srst %r8, %r2 (search the range [R2, R8)) + BVS srstloop // interrupted - continue + BGT notfoundr0 foundr0: - XOR R0, R0 // reset R0 - SUB R6, R8 // remove base - MOVD R8, 0(R2) - RET + XOR R0, R0 // reset R0 + SUB R6, R8 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOVD R8, 0(R5) +#else + MOVD R8, R2 +#endif + RET notfoundr0: - XOR R0, R0 // reset R0 - MOVD $-1, 0(R2) - RET + XOR R0, R0 // reset R0 +#ifndef GOEXPERIMENT_regabiargs + MOVD $-1, 0(R5) +#else + MOVD $-1, R2 +#endif + RET vectorimpl: - //if the address is not 16byte aligned, use loop for the header - MOVD R3, R8 - AND $15, R8 - CMPBGT R8, $0, notaligned + //if the address is not 16byte aligned, use loop for the header + MOVD R2, R8 + AND $15, R8 + CMPBGT R8, $0, notaligned aligned: - ADD R6, R4, R8 - MOVD R8, R7 - AND $-16, R7 - // replicate c across V17 - VLVGB $0, R5, V19 - VREPB $0, V19, V17 + ADD R6, R3, R8 + MOVD R8, R7 + AND $-16, R7 + // replicate c across V17 + VLVGB $0, R4, V19 + VREPB $0, V19, V17 vectorloop: - CMPBGE R3, R7, residual - VL 0(R3), V16 // load string to be searched into V16 - ADD $16, R3 - VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly - BVS vectorloop + CMPBGE R2, R7, residual + VL 0(R2), V16 // load string to be searched into V16 + ADD $16, R2 + VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly + BVS vectorloop - // when vector search found c in the string - VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 - SUB $16, R3 - SUB R6, R3 - ADD R3, R7 - MOVD R7, 0(R2) - RET + // when vector search found c in the string + VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 + SUB $16, R2 + SUB R6, R2 + ADD R2, R7 +#ifndef GOEXPERIMENT_regabiargs + MOVD R7, 0(R5) +#else + MOVD R7, R2 +#endif + RET notaligned: - MOVD R3, R8 - AND $-16, R8 - ADD $16, R8 + MOVD R2, R8 + AND $-16, R8 + ADD $16, R8 notalignedloop: - CMPBEQ R3, R8, aligned - MOVBZ 0(R3), R7 - LA 1(R3), R3 - CMPBNE R7, R5, notalignedloop - BR found + CMPBEQ R2, R8, aligned + MOVBZ 0(R2), R7 + LA 1(R2), R2 + CMPBNE R7, R4, notalignedloop + BR found + diff --git a/src/internal/runtime/syscall/linux/asm_linux_s390x.s b/src/internal/runtime/syscall/linux/asm_linux_s390x.s index 1b27f293907..c912afab649 100644 --- a/src/internal/runtime/syscall/linux/asm_linux_s390x.s +++ b/src/internal/runtime/syscall/linux/asm_linux_s390x.s @@ -5,7 +5,16 @@ #include "textflag.h" // func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr) -TEXT ·Syscall6(SB),NOSPLIT,$0-80 +TEXT ·Syscall6(SB),NOSPLIT,$0-80 +#ifdef GOEXPERIMENT_regabiargs + MOVD R2, R1 + MOVD R3, R2 + MOVD R4, R3 + MOVD R5, R4 + MOVD R6, R5 + MOVD R7, R6 + MOVD R8, R7 +#else MOVD num+0(FP), R1 // syscall entry MOVD a1+8(FP), R2 MOVD a2+16(FP), R3 @@ -13,16 +22,27 @@ TEXT ·Syscall6(SB),NOSPLIT,$0-80 MOVD a4+32(FP), R5 MOVD a5+40(FP), R6 MOVD a6+48(FP), R7 +#endif SYSCALL MOVD $0xfffffffffffff001, R8 CMPUBLT R2, R8, ok +#ifdef GOEXPERIMENT_regabiargs + MOVD $0, R3 + NEG R2, R4 + MOVD $-1, R2 +#else MOVD $-1, r1+56(FP) MOVD $0, r2+64(FP) NEG R2, R2 MOVD R2, errno+72(FP) +#endif RET ok: +#ifdef GOEXPERIMENT_regabiargs + MOVD $0, R4 +#else MOVD R2, r1+56(FP) MOVD R3, r2+64(FP) MOVD $0, errno+72(FP) +#endif RET diff --git a/src/reflect/asm_s390x.s b/src/reflect/asm_s390x.s index 4bd6613004f..22cbd38ee5c 100644 --- a/src/reflect/asm_s390x.s +++ b/src/reflect/asm_s390x.s @@ -5,34 +5,82 @@ #include "textflag.h" #include "funcdata.h" +// The frames of each of the two functions below contain two locals, at offsets +// that are known to the runtime. +// +// The first local is a bool called retValid with a whole pointer-word reserved +// for it on the stack. The purpose of this word is so that the runtime knows +// whether the stack-allocated return space contains valid values for stack +// scanning. +// +// The second local is an abi.RegArgs value whose offset is also known to the +// runtime, so that a stack map for it can be constructed, since it contains +// pointers visible to the GC. + +#define LOCAL_RETVALID 40 +#define LOCAL_REGARGS 48 + +// The frame size of the functions below is +// 32 (args of callReflect/callMethod) + 8 (bool + padding) + 264 (abi.RegArgs) = 304. + // makeFuncStub is the code half of the function returned by MakeFunc. // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. // No arg size here, runtime pulls arg map out of the func value. -TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$40 +TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$304 NO_LOCAL_POINTERS + ADD $LOCAL_REGARGS, R15, R10 // spillArgs using R10 + BL runtime·spillArgs(SB) + MOVD R12, 32(R15) // save context reg R12 > args of moveMakeFuncArgPtrs < LOCAL_REGARGS +#ifdef GOEXPERIMENT_regabiargs + MOVD R12, R2 + MOVD R10, R3 +#else MOVD R12, 8(R15) - MOVD $argframe+0(FP), R3 - MOVD R3, 16(R15) - MOVB $0, 40(R15) - ADD $40, R15, R3 - MOVD R3, 24(R15) - MOVD $0, 32(R15) + MOVD R10, 16(R15) +#endif + BL ·moveMakeFuncArgPtrs(SB) + MOVD 32(R15), R12 // restore context reg R12 + MOVD R12, 8(R15) + MOVD $argframe+0(FP), R1 + MOVD R1, 16(R15) + MOVB $0, LOCAL_RETVALID(R15) + ADD $LOCAL_RETVALID, R15, R1 + MOVD R1, 24(R15) + ADD $LOCAL_REGARGS, R15, R1 + MOVD R1, 32(R15) BL ·callReflect(SB) + ADD $LOCAL_REGARGS, R15, R10 // unspillArgs using R10 + BL runtime·unspillArgs(SB) RET // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$40 +TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$304 NO_LOCAL_POINTERS + ADD $LOCAL_REGARGS, R15, R10 // spillArgs using R10 + BL runtime·spillArgs(SB) + MOVD R12, 32(R15) // save context reg R12 > args of moveMakeFuncArgPtrs < LOCAL_REGARGS +#ifdef GOEXPERIMENT_regabiargs + MOVD R12, R2 + MOVD R10, R3 +#else MOVD R12, 8(R15) - MOVD $argframe+0(FP), R3 - MOVD R3, 16(R15) - MOVB $0, 40(R15) - ADD $40, R15, R3 - MOVD R3, 24(R15) - MOVD $0, 32(R15) + MOVD R10, 16(R15) +#endif + BL ·moveMakeFuncArgPtrs(SB) + MOVD 32(R15), R12 // restore context reg R12 + MOVD R12, 8(R15) + MOVD $argframe+0(FP), R1 + MOVD R1, 16(R15) + MOVB $0, LOCAL_RETVALID(R15) + ADD $LOCAL_RETVALID, R15, R1 + MOVD R1, 24(R15) + ADD $LOCAL_REGARGS, R15, R1 + MOVD R1, 32(R15) BL ·callMethod(SB) + ADD $LOCAL_REGARGS, R15, R10 // unspillArgs using R10 + BL runtime·unspillArgs(SB) RET diff --git a/src/reflect/float32reg_generic.go b/src/reflect/float32reg_generic.go index 23ad4bf285b..efbc3edc3d5 100644 --- a/src/reflect/float32reg_generic.go +++ b/src/reflect/float32reg_generic.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !ppc64 && !ppc64le && !riscv64 +//go:build !ppc64 && !ppc64le && !riscv64 && !s390x package reflect diff --git a/src/reflect/float32reg_s390x.s b/src/reflect/float32reg_s390x.s new file mode 100644 index 00000000000..bcf55823267 --- /dev/null +++ b/src/reflect/float32reg_s390x.s @@ -0,0 +1,30 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build s390x + +#include "textflag.h" + +// On s390x, the float32 becomes a float64 +// when loaded in a register, different from +// other platforms. These functions are +// needed to ensure correct conversions on s390x. + +// Convert float32->uint64 +TEXT ·archFloat32ToReg(SB),NOSPLIT,$0-16 + FMOVS val+0(FP), F1 + FMOVD F1, ret+8(FP) + RET + +// Convert uint64->float32 +TEXT ·archFloat32FromReg(SB),NOSPLIT,$0-12 + FMOVD reg+0(FP), F1 + // Normally a float64->float32 conversion + // would need rounding, but that is not needed + // here since the uint64 was originally converted + // from float32, and should be avoided to + // preserve SNaN values. + FMOVS F1, ret+8(FP) + RET + diff --git a/src/reflect/stubs_s390x.go b/src/reflect/stubs_s390x.go new file mode 100644 index 00000000000..03504d7904b --- /dev/null +++ b/src/reflect/stubs_s390x.go @@ -0,0 +1,10 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build s390x + +package reflect + +func archFloat32FromReg(reg uint64) float32 +func archFloat32ToReg(val float32) uint64 diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index bb29845f583..791ea80bc29 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -160,7 +160,7 @@ nocgo: MOVD $0, 1(R0) RET -DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) +DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) GLOBL runtime·mainPC(SB),RODATA,$8 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 @@ -205,25 +205,29 @@ TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0 // Switch to m->g0's stack, call fn(g). // Fn must never return. It should gogo(&g->sched) // to keep running g. -TEXT runtime·mcall(SB), NOSPLIT, $-8-8 +TEXT runtime·mcall(SB), NOSPLIT, $-8-8 +#ifdef GOEXPERIMENT_regabiargs + MOVD R2, R12 // context +#else + MOVD fn+0(FP), R12 // context +#endif // Save caller state in g->sched MOVD R15, (g_sched+gobuf_sp)(g) MOVD LR, (g_sched+gobuf_pc)(g) MOVD $0, (g_sched+gobuf_lr)(g) // Switch to m->g0 & its stack, call fn. - MOVD g, R3 - MOVD g_m(g), R8 - MOVD m_g0(R8), g + MOVD g, R2 + MOVD g_m(g), R4 + MOVD m_g0(R4), g BL runtime·save_g(SB) - CMP g, R3 + CMP g, R2 BNE 2(PC) BR runtime·badmcall(SB) - MOVD fn+0(FP), R12 // context MOVD 0(R12), R4 // code pointer MOVD (g_sched+gobuf_sp)(g), R15 // sp = m->g0->sched.sp SUB $16, R15 - MOVD R3, 8(R15) + MOVD R2, 8(R15) MOVD $0, 0(R15) BL (R4) BR runtime·badmcall2(SB) @@ -292,18 +296,18 @@ noswitch: // func switchToCrashStack0(fn func()) TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-8 - MOVD fn+0(FP), R12 // context - MOVD g_m(g), R4 // curm + MOVD R2, R12 // context + MOVD g_m(g), R2 // curm // set g to gcrash MOVD $runtime·gcrash(SB), g // g = &gcrash BL runtime·save_g(SB) - MOVD R4, g_m(g) // g.m = curm - MOVD g, m_g0(R4) // curm.g0 = g + MOVD R2, g_m(g) // g.m = curm + MOVD g, m_g0(R2) // curm.g0 = g // switch to crashstack - MOVD (g_stack+stack_hi)(g), R4 - ADD $(-4*8), R4, R15 + MOVD (g_stack+stack_hi)(g), R2 + ADD $(-4*8), R2, R15 // call target function MOVD 0(R12), R3 // code pointer @@ -446,10 +450,14 @@ tailArgs: /* copy remaining bytes */ \ EXRL $callfnMVC<>(SB), R5; \ callFunction: \ MOVD f+8(FP), R12; \ - MOVD (R12), R8; \ + MOVD regArgs+40(FP), R10; \ + BL ·unspillArgs(SB); \ + MOVD (R12), R10; \ PCDATA $PCDATA_StackMapIndex, $0; \ - BL (R8); \ + BL (R10); \ /* copy return values back */ \ + MOVD regArgs+40(FP), R10; \ + BL ·spillArgs(SB); \ MOVD stackArgsType+0(FP), R7; \ MOVD stackArgs+16(FP), R6; \ MOVWZ stackArgsSize+24(FP), R5; \ @@ -466,11 +474,12 @@ callFunction: \ // to reflectcallmove. It does not follow the Go ABI; it expects its // arguments in registers. TEXT callRet<>(SB), NOSPLIT, $40-0 + NO_LOCAL_POINTERS; MOVD R7, 8(R15) MOVD R6, 16(R15) MOVD R4, 24(R15) MOVD R5, 32(R15) - MOVD $0, 40(R15) + MOVD R10, 40(R15) BL runtime·reflectcallmove(SB) RET @@ -754,15 +763,80 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8 MOVD R3, ret+0(FP) RET +#ifdef GOEXPERIMENT_regabiargs +// spillArgs stores return values from registers to a *internal/abi.RegArgs in R10. +TEXT runtime·spillArgs(SB),NOSPLIT,$0-0 + MOVD R2, 0(R10) + MOVD R3, 8(R10) + MOVD R4, 16(R10) + MOVD R5, 24(R10) + MOVD R6, 32(R10) + MOVD R7, 40(R10) + MOVD R8, 48(R10) + MOVD R9, 56(R10) + FMOVD F0, 64(R10) + FMOVD F1, 72(R10) + FMOVD F2, 80(R10) + FMOVD F3, 88(R10) + FMOVD F4, 96(R10) + FMOVD F5, 104(R10) + FMOVD F6, 112(R10) + FMOVD F7, 120(R10) + FMOVD F8, 128(R10) + FMOVD F9, 136(R10) + FMOVD F10, 144(R10) + FMOVD F11, 152(R10) + FMOVD F12, 160(R10) + FMOVD F13, 168(R10) + FMOVD F14, 176(R10) + FMOVD F15, 184(R10) + RET + +// unspillArgs loads args into registers from a *internal/abi.RegArgs in R10. +TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0 + MOVD 0(R10), R2 + MOVD 8(R10), R3 + MOVD 16(R10), R4 + MOVD 24(R10), R5 + MOVD 32(R10), R6 + MOVD 40(R10), R7 + MOVD 48(R10), R8 + MOVD 56(R10), R9 + FMOVD 64(R10), F0 + FMOVD 72(R10), F1 + FMOVD 80(R10), F2 + FMOVD 88(R10), F3 + FMOVD 96(R10), F4 + FMOVD 104(R10), F5 + FMOVD 112(R10), F6 + FMOVD 120(R10), F7 + FMOVD 128(R10), F8 + FMOVD 136(R10), F9 + FMOVD 144(R10), F10 + FMOVD 152(R10), F11 + FMOVD 160(R10), F12 + FMOVD 168(R10), F13 + FMOVD 176(R10), F14 + FMOVD 184(R10), F15 + RET +#else + +TEXT runtime·spillArgs(SB),NOSPLIT,$0-0 + RET + +TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0 + RET +#endif + // AES hashing not implemented for s390x -TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32 - JMP runtime·memhashFallback(SB) -TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24 - JMP runtime·strhashFallback(SB) -TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 - JMP runtime·memhash32Fallback(SB) -TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24 - JMP runtime·memhash64Fallback(SB) +TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32 + JMP runtime·memhashFallback(SB) +TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24 + JMP runtime·strhashFallback(SB) +TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 + JMP runtime·memhash32Fallback(SB) +TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24 + JMP runtime·memhash64Fallback(SB) // Called from cgo wrappers, this function returns g->m->curg.stack.hi. // Must obey the gcc calling convention. @@ -902,8 +976,7 @@ TEXT runtime·panicBounds(SB),NOSPLIT,$144-0 // skip R14 aka LR @ 136 // skip R15 aka SP @ 144 - MOVD R14, 8(R15) // PC immediately after call to panicBounds - ADD $24, R15, R0 // pointer to save area - MOVD R0, 16(R15) + MOVD R14, R2 // PC immediately after call to panicBounds + ADD $24, R15, R3 // pointer to save area CALL runtime·panicBounds64(SB) RET diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index f01353ffa6c..55e7bdbdb55 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -796,6 +796,9 @@ func cgoCheckResult(val any) { ep := efaceOf(&val) t := ep._type + if t == nil { + return + } cgoCheckArg(t, ep.data, !t.IsDirectIface(), false, cgoResultFail) } diff --git a/src/runtime/memclr_s390x.s b/src/runtime/memclr_s390x.s index 392057565e8..919423edf78 100644 --- a/src/runtime/memclr_s390x.s +++ b/src/runtime/memclr_s390x.s @@ -7,10 +7,14 @@ // See memclrNoHeapPointers Go doc for important implementation constraints. // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) -TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT|NOFRAME,$0-16 +TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT|NOFRAME,$0-16 +#ifndef GOEXPERIMENT_regabiargs MOVD ptr+0(FP), R4 MOVD n+8(FP), R5 - +#else + MOVD R2, R4 + MOVD R3, R5 +#endif CMPBGE R5, $32, clearge32 start: diff --git a/src/runtime/memmove_s390x.s b/src/runtime/memmove_s390x.s index f4c2b87d929..28c6a5dab20 100644 --- a/src/runtime/memmove_s390x.s +++ b/src/runtime/memmove_s390x.s @@ -7,10 +7,16 @@ // See memmove Go doc for important implementation constraints. // func memmove(to, from unsafe.Pointer, n uintptr) -TEXT runtime·memmove(SB),NOSPLIT|NOFRAME,$0-24 +TEXT runtime·memmove(SB),NOSPLIT|NOFRAME,$0-24 +#ifndef GOEXPERIMENT_regabiargs MOVD to+0(FP), R6 MOVD from+8(FP), R4 MOVD n+16(FP), R5 +#else + MOVD R4, R5 + MOVD R3, R4 + MOVD R2, R6 +#endif CMPBEQ R6, R4, done diff --git a/src/runtime/race_s390x.s b/src/runtime/race_s390x.s index 3dfda9e733b..d5a0bbedace 100644 --- a/src/runtime/race_s390x.s +++ b/src/runtime/race_s390x.s @@ -25,10 +25,14 @@ // func runtime·raceread(addr uintptr) // Called from instrumented code. -TEXT runtime·raceread(SB), NOSPLIT, $0-8 +TEXT runtime·raceread(SB), NOSPLIT, $0-8 // void __tsan_read(ThreadState *thr, void *addr, void *pc); MOVD $__tsan_read(SB), R1 +#ifndef GOEXPERIMENT_regabiargs MOVD addr+0(FP), R3 +#else + MOVD R2, R3 +#endif MOVD R14, R4 JMP racecalladdr<>(SB) @@ -46,10 +50,14 @@ TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 // func runtime·racewrite(addr uintptr) // Called from instrumented code. -TEXT runtime·racewrite(SB), NOSPLIT, $0-8 +TEXT runtime·racewrite(SB), NOSPLIT, $0-8 // void __tsan_write(ThreadState *thr, void *addr, void *pc); MOVD $__tsan_write(SB), R1 +#ifndef GOEXPERIMENT_regabiargs MOVD addr+0(FP), R3 +#else + MOVD R2, R3 +#endif MOVD R14, R4 JMP racecalladdr<>(SB) @@ -67,10 +75,15 @@ TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 // func runtime·racereadrange(addr, size uintptr) // Called from instrumented code. -TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 +TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); MOVD $__tsan_read_range(SB), R1 +#ifndef GOEXPERIMENT_regabiargs LMG addr+0(FP), R3, R4 +#else + MOVD R3, R4 + MOVD R2, R3 +#endif MOVD R14, R5 JMP racecalladdr<>(SB) @@ -91,10 +104,15 @@ TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 // func runtime·racewriterange(addr, size uintptr) // Called from instrumented code. -TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 +TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); MOVD $__tsan_write_range(SB), R1 +#ifndef GOEXPERIMENT_regabiargs LMG addr+0(FP), R3, R4 +#else + MOVD R3, R4 + MOVD R2, R3 +#endif MOVD R14, R5 JMP racecalladdr<>(SB) diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 43e4c142362..64ee4c8d2e9 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -39,7 +39,7 @@ func gotraceback() (level int32, all, crash bool) { gp := getg() t := atomic.Load(&traceback_cache) crash = t&tracebackCrash != 0 - all = gp.m.throwing >= throwTypeUser || t&tracebackAll != 0 + all = gp.m.throwing > throwTypeUser || t&tracebackAll != 0 if gp.m.traceback != 0 { level = int32(gp.m.traceback) } else if gp.m.throwing >= throwTypeRuntime { diff --git a/src/runtime/stkframe.go b/src/runtime/stkframe.go index 819b7f6c7d9..d6e7e0371c0 100644 --- a/src/runtime/stkframe.go +++ b/src/runtime/stkframe.go @@ -234,7 +234,7 @@ func (frame *stkframe) getStackMap(debug bool) (locals, args bitvector, objs []s } // stack objects. - if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") && + if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64" || GOARCH == "s390x") && unsafe.Sizeof(abi.RegArgs{}) > 0 && isReflect { // For reflect.makeFuncStub and reflect.methodValueCall, // we need to fake the stack object record. diff --git a/src/runtime/stubs_s390x.go b/src/runtime/stubs_s390x.go index a2b07ff8aa8..6d704e82003 100644 --- a/src/runtime/stubs_s390x.go +++ b/src/runtime/stubs_s390x.go @@ -8,6 +8,13 @@ package runtime func load_g() func save_g() +// Used by reflectcall and the reflect package. +// +// Spills/loads arguments in registers to/from an internal/abi.RegArgs +// respectively. Does not follow the Go ABI. +func spillArgs() +func unspillArgs() + // getfp returns the frame pointer register of its caller or 0 if not implemented. // TODO: Make this a compiler intrinsic func getfp() uintptr { return 0 } diff --git a/src/runtime/tls_s390x.s b/src/runtime/tls_s390x.s index cb6a21c1143..388e7b88bd9 100644 --- a/src/runtime/tls_s390x.s +++ b/src/runtime/tls_s390x.s @@ -19,7 +19,7 @@ // // If !iscgo, this is a no-op. // -// NOTE: setg_gcc<> assume this clobbers only R10 and R11. +// NOTE: setg_gcc<> and mcall assume this clobbers only R10 and R11. TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0 MOVB runtime·iscgo(SB), R10 CMPBEQ R10, $0, nocgo