From 2e5d12a277cc313c751c10b27893c256bfc01c93 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:13:51 +0100 Subject: [PATCH 01/15] cmd/compile: document register-based ABI for s390x This CL adds the s390x information to the ABI doc. Update #40724 Cq-Include-Trybots: luci.golang.try:gotip-linux-s390x Change-Id: I1b4b25ef1003e2ab011e1b808aeb1c02288095c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/719460 Reviewed-by: Cherry Mui Reviewed-by: Vishwanatha HD TryBot-Bypass: David Chase Reviewed-by: Keith Randall Reviewed-by: Michael Pratt --- src/cmd/compile/abi-internal.md | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/cmd/compile/abi-internal.md b/src/cmd/compile/abi-internal.md index eae230dc070..8de8356132a 100644 --- a/src/cmd/compile/abi-internal.md +++ b/src/cmd/compile/abi-internal.md @@ -833,6 +833,51 @@ The riscv64 has Zicsr extension for control and status register (CSR) and treated as scratch register. All bits in CSR are system flags and are not modified by Go. +### s390x architecture + +The s390x architecture uses R2 – R9 for integer arguments and integer results. + +It uses F0 – F15 for floating-point arguments and results. + +Special-purpose registers used within Go generated code and Go assembly code +are as follows: + +| Register | Call meaning | Return meaning | Body meaning | +| --- | --- | --- | --- | +| R0 | Zero value | Same | Same | +| R1 | Scratch | Scratch | Scratch | +| R10, R11 | used by the assembler | Same | Same | +| R12 | Closure context pointer | Same | Same | +| R13 | Current goroutine | Same | Same | +| R14 | Link register | Link register | Scratch | +| R15 | Stack pointer | Same | Same | + +*Rationale*: These register meanings are compatible with Go’s stack-based +calling convention. + +#### Stack layout + +The stack pointer, R15, grows down and is aligned to 8 bytes. + +A function's stack frame, after the frame is created, is laid out as +follows: + + +------------------------------+ + | ... locals ... | + | ... outgoing arguments ... | + | return PC | ← R15 points to + +------------------------------+ ↓ lower addresses + +This stack layout is used by both register-based (ABIInternal) and +stack-based (ABI0) calling conventions. + +The "return PC" is loaded to the link register R14, as part of the +s390x `BL` operation. + +#### Flags +The s390x architecture maintains a single condition code (CC) field in the Program Status Word (PSW). +Go-generated code sets and tests this condition code to control conditional branches. + ## Future directions ### Spill path improvements From 1036f6f48595a9a8aedf265c5e17df84cbf03a9f Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:25:23 +0100 Subject: [PATCH 02/15] internal/abi: define s390x ABI constants Updates #40724 Change-Id: I9b6c56194b2cbc95c08441dfa1f779ed5efbadb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/719461 Reviewed-by: Mark Freeman Reviewed-by: Keith Randall Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: Vishwanatha HD --- src/internal/abi/abi_s390x.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/internal/abi/abi_s390x.go diff --git a/src/internal/abi/abi_s390x.go b/src/internal/abi/abi_s390x.go new file mode 100644 index 00000000000..9b07f27382d --- /dev/null +++ b/src/internal/abi/abi_s390x.go @@ -0,0 +1,19 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.regabiargs + +package abi + +const ( + // See abi_generic.go. + + // R2 - R9. + IntArgRegs = 8 + + // F0 - F15 + FloatArgRegs = 16 + + EffectiveFloatRegSize = 8 +) From 73b6aa0fecbf8b32e04a7226a964a454694939af Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:33:33 +0100 Subject: [PATCH 03/15] cmd/compile/internal: add register ABI information for s390x Update #40724 Change-Id: If8f2574259560b097db29347b2aecb098acef863 Reviewed-on: https://go-review.googlesource.com/c/go/+/719462 Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall --- src/cmd/compile/internal/s390x/ssa.go | 2 ++ src/cmd/compile/internal/ssa/_gen/S390XOps.go | 22 ++++++++++--------- src/cmd/compile/internal/ssa/config.go | 2 ++ src/cmd/compile/internal/ssa/opGen.go | 4 ++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index de00f1ef8ce..03af5bdd9e7 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -540,6 +540,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() ssagen.AddrAuto(&p.To, v) + case ssa.OpArgIntReg, ssa.OpArgFloatReg: + ssagen.CheckArgReg(v) case ssa.OpS390XLoweredGetClosurePtr: // Closure pointer is R12 (already) ssagen.CheckLoweredGetClosurePtr(v) diff --git a/src/cmd/compile/internal/ssa/_gen/S390XOps.go b/src/cmd/compile/internal/ssa/_gen/S390XOps.go index c002d5bcc38..10d88a9c364 100644 --- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go @@ -812,16 +812,18 @@ func init() { } archs = append(archs, arch{ - name: "S390X", - pkg: "cmd/internal/obj/s390x", - genfile: "../../s390x/ssa.go", - ops: S390Xops, - blocks: S390Xblocks, - regnames: regNamesS390X, - gpregmask: gp, - fpregmask: fp, - framepointerreg: -1, // not used - linkreg: int8(num["R14"]), + name: "S390X", + pkg: "cmd/internal/obj/s390x", + genfile: "../../s390x/ssa.go", + ops: S390Xops, + blocks: S390Xblocks, + regnames: regNamesS390X, + ParamIntRegNames: "R2 R3 R4 R5 R6 R7 R8 R9", + ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15", + gpregmask: gp, + fpregmask: fp, + framepointerreg: -1, // not used + linkreg: int8(num["R14"]), imports: []string{ "cmd/internal/obj/s390x", }, diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 3540db498cf..cb41bc5ed5a 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -305,6 +305,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.registers = registersS390X[:] c.gpRegMask = gpRegMaskS390X c.fpRegMask = fpRegMaskS390X + //c.intParamRegs = paramIntRegS390X + //c.floatParamRegs = paramFloatRegS390X c.FPReg = framepointerRegS390X c.LinkReg = linkRegS390X c.hasGReg = true diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 26ab2cacce2..8ebdb9d9443 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -43559,8 +43559,8 @@ var registersS390X = [...]Register{ {31, s390x.REG_F15, "F15"}, {32, 0, "SB"}, } -var paramIntRegS390X = []int8(nil) -var paramFloatRegS390X = []int8(nil) +var paramIntRegS390X = []int8{2, 3, 4, 5, 6, 7, 8, 9} +var paramFloatRegS390X = []int8{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} var gpRegMaskS390X = regMask(23551) var fpRegMaskS390X = regMask(4294901760) var specialRegMaskS390X = regMask(0) From 81242d034c8a6b9544c3e65ed1bb8000fe35ae84 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:37:44 +0100 Subject: [PATCH 04/15] cmd/compile/internal/s390x: add initial spill support This adds some initial support for spilling and reloading registers in the new ABI for s390x Update #40724 Change-Id: Icc46a9375454765dea7d03fc4c8f2dbcc87f5f50 Reviewed-on: https://go-review.googlesource.com/c/go/+/719463 Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/cmd/compile/internal/s390x/galign.go | 2 ++ src/cmd/compile/internal/s390x/ssa.go | 31 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/cmd/compile/internal/s390x/galign.go b/src/cmd/compile/internal/s390x/galign.go index d880834c220..1fb371a52c6 100644 --- a/src/cmd/compile/internal/s390x/galign.go +++ b/src/cmd/compile/internal/s390x/galign.go @@ -20,4 +20,6 @@ func Init(arch *ssagen.ArchInfo) { arch.SSAMarkMoves = ssaMarkMoves arch.SSAGenValue = ssaGenValue arch.SSAGenBlock = ssaGenBlock + arch.LoadRegResult = loadRegResult + arch.SpillArgReg = spillArgReg } diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go index 03af5bdd9e7..ce060597d9a 100644 --- a/src/cmd/compile/internal/s390x/ssa.go +++ b/src/cmd/compile/internal/s390x/ssa.go @@ -10,6 +10,7 @@ import ( "cmd/compile/internal/base" "cmd/compile/internal/ir" "cmd/compile/internal/logopt" + "cmd/compile/internal/objw" "cmd/compile/internal/ssa" "cmd/compile/internal/ssagen" "cmd/compile/internal/types" @@ -541,6 +542,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() ssagen.AddrAuto(&p.To, v) case ssa.OpArgIntReg, ssa.OpArgFloatReg: + // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill + // The loop only runs once. + for _, a := range v.Block.Func.RegArgs { + // Pass the spill/unspill information along to the assembler, offset by size of + // the saved LR slot. + addr := ssagen.SpillSlotAddr(a, s390x.REGSP, base.Ctxt.Arch.FixedFrameSize) + s.FuncInfo().AddSpill( + obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)}) + } + v.Block.Func.RegArgs = nil + ssagen.CheckArgReg(v) case ssa.OpS390XLoweredGetClosurePtr: // Closure pointer is R12 (already) @@ -1031,3 +1043,22 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { s.Br(s390x.ABR, succs[1]) } } + +func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { + p := s.Prog(loadByType(t)) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_AUTO + p.From.Sym = n.Linksym() + p.From.Offset = n.FrameOffset() + off + p.To.Type = obj.TYPE_REG + p.To.Reg = reg + return p +} + +func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { + p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) + p.To.Name = obj.NAME_PARAM + p.To.Sym = n.Linksym() + p.Pos = p.Pos.WithNotStmt() + return p +} From 24697419c54c046370c3431fb45842689c042984 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:41:50 +0100 Subject: [PATCH 05/15] cmd/compile: update s390x CALL* ops This CL allow the CALL ops to take variable no of arguments. Update #40724 Change-Id: Ibfa2e98c5051684cae69200c396dfa1edb2878e4 Reviewed-on: https://go-review.googlesource.com/c/go/+/719464 Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/_gen/S390XOps.go | 8 ++++---- src/cmd/compile/internal/ssa/opGen.go | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/S390XOps.go b/src/cmd/compile/internal/ssa/_gen/S390XOps.go index 10d88a9c364..9e67a06ce8c 100644 --- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go @@ -484,10 +484,10 @@ func init() { {name: "CLEAR", argLength: 2, reg: regInfo{inputs: []regMask{ptr, 0}}, asm: "CLEAR", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Write"}, - {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem - {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem - {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem - {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem + {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem + {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem + {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{ptrsp, buildReg("R12"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem + {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{ptr}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem // (InvertFlags (CMP a b)) == (CMP b a) // InvertFlags is a pseudo-op which can't appear in assembly output. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 8ebdb9d9443..c7777f246eb 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -38596,7 +38596,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLstatic", auxType: auxCallOff, - argLen: 1, + argLen: -1, clobberFlags: true, call: true, reg: regInfo{ @@ -38606,7 +38606,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLtail", auxType: auxCallOff, - argLen: 1, + argLen: -1, clobberFlags: true, call: true, tailCall: true, @@ -38617,7 +38617,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLclosure", auxType: auxCallOff, - argLen: 3, + argLen: -1, clobberFlags: true, call: true, reg: regInfo{ @@ -38631,7 +38631,7 @@ var opcodeTable = [...]opInfo{ { name: "CALLinter", auxType: auxCallOff, - argLen: 2, + argLen: -1, clobberFlags: true, call: true, reg: regInfo{ From 85e60800893df03c5b071f66fa2dde5e00fdf295 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:47:55 +0100 Subject: [PATCH 06/15] cmd/internal/obj: set morestack arg spilling and regabi prologue on s390x This CL spill arg registers before calling morestack, unspill them after morestack call. It also avoid clobbering the register that could contain incoming argument values. Change registers on s390x to avoid regABI arguments. Update #40724 Change-Id: I67b20552410dd23ef0b86f14b9c5bfed9f9723a6 Reviewed-on: https://go-review.googlesource.com/c/go/+/719421 Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: David Chase --- src/cmd/internal/obj/s390x/a.out.go | 4 +-- src/cmd/internal/obj/s390x/objz.go | 50 ++++++++++++++++++----------- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/src/cmd/internal/obj/s390x/a.out.go b/src/cmd/internal/obj/s390x/a.out.go index caf5ec09358..6b16d7a9bd5 100644 --- a/src/cmd/internal/obj/s390x/a.out.go +++ b/src/cmd/internal/obj/s390x/a.out.go @@ -139,8 +139,8 @@ const ( REG_RESERVED // end of allocated registers REGARG = -1 // -1 disables passing the first argument in register - REGRT1 = REG_R3 // used during zeroing of the stack - not reserved - REGRT2 = REG_R4 // used during zeroing of the stack - not reserved + REGRT1 = REG_R1 // used during zeroing of the stack - not reserved + REGRT2 = REG_R10 // used during zeroing of the stack - not reserved REGTMP = REG_R10 // scratch register used in the assembler and linker REGTMP2 = REG_R11 // scratch register used in the assembler and linker REGCTXT = REG_R12 // context for closures diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go index 44c1a7d586d..4bfc1f7b2c0 100644 --- a/src/cmd/internal/obj/s390x/objz.go +++ b/src/cmd/internal/obj/s390x/objz.go @@ -506,7 +506,13 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh // Save LR and REGCTXT const frameSize = 16 p = c.ctxt.StartUnsafePoint(p, c.newprog) + + // Spill arguments. This has to happen before we open + // any more frame space. + p = c.cursym.Func().SpillRegisterArgs(p, c.newprog) + // MOVD LR, -16(SP) + p = obj.Appendp(p, c.newprog) p.As = AMOVD p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR} @@ -549,10 +555,12 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REGSP} p.Spadj = -frameSize + // Unspill arguments + p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog) p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) } - // MOVD g_stackguard(g), R3 + // MOVD g_stackguard(g), R10 p = obj.Appendp(p, c.newprog) // Jump back to here after morestack returns. pCheck = p @@ -565,7 +573,7 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 } p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R3 + p.To.Reg = REG_R10 // Mark the stack bound check and morestack call async nonpreemptible. // If we get preempted here, when resumed the preemption request is @@ -579,7 +587,7 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh p = obj.Appendp(p, c.newprog) p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 + p.From.Reg = REG_R10 p.Reg = REGSP p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH @@ -598,40 +606,40 @@ func (c *ctxtz) stacksplitPre(p *obj.Prog, framesize int32) (pPre, pPreempt, pCh // stack guard to incorrectly succeed. We explicitly // guard against underflow. // - // MOVD $(framesize-StackSmall), R4 - // CMPUBLT SP, R4, label-of-call-to-morestack + // MOVD $(framesize-StackSmall), R11 + // CMPUBLT SP, R11, label-of-call-to-morestack p = obj.Appendp(p, c.newprog) p.As = AMOVD p.From.Type = obj.TYPE_CONST p.From.Offset = offset p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 + p.To.Reg = REG_R11 p = obj.Appendp(p, c.newprog) pPreempt = p p.As = ACMPUBLT p.From.Type = obj.TYPE_REG p.From.Reg = REGSP - p.Reg = REG_R4 + p.Reg = REG_R11 p.To.Type = obj.TYPE_BRANCH } // Check against the stack guard. We've ensured this won't underflow. - // ADD $-(framesize-StackSmall), SP, R4 - // CMPUBGE stackguard, R4, label-of-call-to-morestack + // ADD $-(framesize-StackSmall), SP, R11 + // CMPUBGE stackguard, R11, label-of-call-to-morestack p = obj.Appendp(p, c.newprog) p.As = AADD p.From.Type = obj.TYPE_CONST p.From.Offset = -offset p.Reg = REGSP p.To.Type = obj.TYPE_REG - p.To.Reg = REG_R4 + p.To.Reg = REG_R11 p = obj.Appendp(p, c.newprog) p.From.Type = obj.TYPE_REG - p.From.Reg = REG_R3 - p.Reg = REG_R4 + p.From.Reg = REG_R10 + p.Reg = REG_R11 p.As = ACMPUBGE p.To.Type = obj.TYPE_BRANCH @@ -654,18 +662,22 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre, pPreempt, pCheck *obj.Prog, fr pcdata := c.ctxt.EmitEntryStackMap(c.cursym, spfix, c.newprog) pcdata = c.ctxt.StartUnsafePoint(pcdata, c.newprog) + if pPreempt != nil { + pPreempt.To.SetTarget(pcdata) + } + pPre.To.SetTarget(pcdata) + + // Spill the register args that could be clobbered by the + // morestack code. + spill := c.cursym.Func().SpillRegisterArgs(pcdata, c.newprog) // MOVD LR, R5 - p = obj.Appendp(pcdata, c.newprog) - pPre.To.SetTarget(p) + p = obj.Appendp(spill, c.newprog) p.As = AMOVD p.From.Type = obj.TYPE_REG p.From.Reg = REG_LR p.To.Type = obj.TYPE_REG p.To.Reg = REG_R5 - if pPreempt != nil { - pPreempt.To.SetTarget(p) - } // BL runtime.morestack(SB) p = obj.Appendp(p, c.newprog) @@ -680,10 +692,12 @@ func (c *ctxtz) stacksplitPost(p *obj.Prog, pPre, pPreempt, pCheck *obj.Prog, fr p.To.Sym = c.ctxt.Lookup("runtime.morestack") } + // The instructions which unspill regs should be preemptible. p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) + unspill := c.cursym.Func().UnspillRegisterArgs(p, c.newprog) // BR pCheck - p = obj.Appendp(p, c.newprog) + p = obj.Appendp(unspill, c.newprog) p.As = ABR p.To.Type = obj.TYPE_BRANCH From 41af98eb83bfa66077ed5a90e64f31b92be33ac6 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:53:03 +0100 Subject: [PATCH 07/15] runtime: add runtime changes for register ABI on s390x This adds the changes for the register ABI in the runtime functions for s390x platform: - Add spill/unspill functions used by runtime - Add ABIInternal to functions Updates #40724 Change-Id: I6aaeec1d7293b6fec2aa489df90414937b80199e Reviewed-on: https://go-review.googlesource.com/c/go/+/719465 Reviewed-by: David Chase Reviewed-by: Keith Randall Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Vishwanatha HD --- src/runtime/asm_s390x.s | 129 +++++++++++++++++++++++++++++-------- src/runtime/stubs_s390x.go | 7 ++ src/runtime/tls_s390x.s | 2 +- 3 files changed, 109 insertions(+), 29 deletions(-) diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index bb29845f583..791ea80bc29 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -160,7 +160,7 @@ nocgo: MOVD $0, 1(R0) RET -DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) +DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) GLOBL runtime·mainPC(SB),RODATA,$8 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 @@ -205,25 +205,29 @@ TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0 // Switch to m->g0's stack, call fn(g). // Fn must never return. It should gogo(&g->sched) // to keep running g. -TEXT runtime·mcall(SB), NOSPLIT, $-8-8 +TEXT runtime·mcall(SB), NOSPLIT, $-8-8 +#ifdef GOEXPERIMENT_regabiargs + MOVD R2, R12 // context +#else + MOVD fn+0(FP), R12 // context +#endif // Save caller state in g->sched MOVD R15, (g_sched+gobuf_sp)(g) MOVD LR, (g_sched+gobuf_pc)(g) MOVD $0, (g_sched+gobuf_lr)(g) // Switch to m->g0 & its stack, call fn. - MOVD g, R3 - MOVD g_m(g), R8 - MOVD m_g0(R8), g + MOVD g, R2 + MOVD g_m(g), R4 + MOVD m_g0(R4), g BL runtime·save_g(SB) - CMP g, R3 + CMP g, R2 BNE 2(PC) BR runtime·badmcall(SB) - MOVD fn+0(FP), R12 // context MOVD 0(R12), R4 // code pointer MOVD (g_sched+gobuf_sp)(g), R15 // sp = m->g0->sched.sp SUB $16, R15 - MOVD R3, 8(R15) + MOVD R2, 8(R15) MOVD $0, 0(R15) BL (R4) BR runtime·badmcall2(SB) @@ -292,18 +296,18 @@ noswitch: // func switchToCrashStack0(fn func()) TEXT runtime·switchToCrashStack0(SB), NOSPLIT, $0-8 - MOVD fn+0(FP), R12 // context - MOVD g_m(g), R4 // curm + MOVD R2, R12 // context + MOVD g_m(g), R2 // curm // set g to gcrash MOVD $runtime·gcrash(SB), g // g = &gcrash BL runtime·save_g(SB) - MOVD R4, g_m(g) // g.m = curm - MOVD g, m_g0(R4) // curm.g0 = g + MOVD R2, g_m(g) // g.m = curm + MOVD g, m_g0(R2) // curm.g0 = g // switch to crashstack - MOVD (g_stack+stack_hi)(g), R4 - ADD $(-4*8), R4, R15 + MOVD (g_stack+stack_hi)(g), R2 + ADD $(-4*8), R2, R15 // call target function MOVD 0(R12), R3 // code pointer @@ -446,10 +450,14 @@ tailArgs: /* copy remaining bytes */ \ EXRL $callfnMVC<>(SB), R5; \ callFunction: \ MOVD f+8(FP), R12; \ - MOVD (R12), R8; \ + MOVD regArgs+40(FP), R10; \ + BL ·unspillArgs(SB); \ + MOVD (R12), R10; \ PCDATA $PCDATA_StackMapIndex, $0; \ - BL (R8); \ + BL (R10); \ /* copy return values back */ \ + MOVD regArgs+40(FP), R10; \ + BL ·spillArgs(SB); \ MOVD stackArgsType+0(FP), R7; \ MOVD stackArgs+16(FP), R6; \ MOVWZ stackArgsSize+24(FP), R5; \ @@ -466,11 +474,12 @@ callFunction: \ // to reflectcallmove. It does not follow the Go ABI; it expects its // arguments in registers. TEXT callRet<>(SB), NOSPLIT, $40-0 + NO_LOCAL_POINTERS; MOVD R7, 8(R15) MOVD R6, 16(R15) MOVD R4, 24(R15) MOVD R5, 32(R15) - MOVD $0, 40(R15) + MOVD R10, 40(R15) BL runtime·reflectcallmove(SB) RET @@ -754,15 +763,80 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8 MOVD R3, ret+0(FP) RET +#ifdef GOEXPERIMENT_regabiargs +// spillArgs stores return values from registers to a *internal/abi.RegArgs in R10. +TEXT runtime·spillArgs(SB),NOSPLIT,$0-0 + MOVD R2, 0(R10) + MOVD R3, 8(R10) + MOVD R4, 16(R10) + MOVD R5, 24(R10) + MOVD R6, 32(R10) + MOVD R7, 40(R10) + MOVD R8, 48(R10) + MOVD R9, 56(R10) + FMOVD F0, 64(R10) + FMOVD F1, 72(R10) + FMOVD F2, 80(R10) + FMOVD F3, 88(R10) + FMOVD F4, 96(R10) + FMOVD F5, 104(R10) + FMOVD F6, 112(R10) + FMOVD F7, 120(R10) + FMOVD F8, 128(R10) + FMOVD F9, 136(R10) + FMOVD F10, 144(R10) + FMOVD F11, 152(R10) + FMOVD F12, 160(R10) + FMOVD F13, 168(R10) + FMOVD F14, 176(R10) + FMOVD F15, 184(R10) + RET + +// unspillArgs loads args into registers from a *internal/abi.RegArgs in R10. +TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0 + MOVD 0(R10), R2 + MOVD 8(R10), R3 + MOVD 16(R10), R4 + MOVD 24(R10), R5 + MOVD 32(R10), R6 + MOVD 40(R10), R7 + MOVD 48(R10), R8 + MOVD 56(R10), R9 + FMOVD 64(R10), F0 + FMOVD 72(R10), F1 + FMOVD 80(R10), F2 + FMOVD 88(R10), F3 + FMOVD 96(R10), F4 + FMOVD 104(R10), F5 + FMOVD 112(R10), F6 + FMOVD 120(R10), F7 + FMOVD 128(R10), F8 + FMOVD 136(R10), F9 + FMOVD 144(R10), F10 + FMOVD 152(R10), F11 + FMOVD 160(R10), F12 + FMOVD 168(R10), F13 + FMOVD 176(R10), F14 + FMOVD 184(R10), F15 + RET +#else + +TEXT runtime·spillArgs(SB),NOSPLIT,$0-0 + RET + +TEXT runtime·unspillArgs(SB),NOSPLIT,$0-0 + RET +#endif + // AES hashing not implemented for s390x -TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32 - JMP runtime·memhashFallback(SB) -TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24 - JMP runtime·strhashFallback(SB) -TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 - JMP runtime·memhash32Fallback(SB) -TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24 - JMP runtime·memhash64Fallback(SB) +TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32 + JMP runtime·memhashFallback(SB) +TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24 + JMP runtime·strhashFallback(SB) +TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 + JMP runtime·memhash32Fallback(SB) +TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24 + JMP runtime·memhash64Fallback(SB) // Called from cgo wrappers, this function returns g->m->curg.stack.hi. // Must obey the gcc calling convention. @@ -902,8 +976,7 @@ TEXT runtime·panicBounds(SB),NOSPLIT,$144-0 // skip R14 aka LR @ 136 // skip R15 aka SP @ 144 - MOVD R14, 8(R15) // PC immediately after call to panicBounds - ADD $24, R15, R0 // pointer to save area - MOVD R0, 16(R15) + MOVD R14, R2 // PC immediately after call to panicBounds + ADD $24, R15, R3 // pointer to save area CALL runtime·panicBounds64(SB) RET diff --git a/src/runtime/stubs_s390x.go b/src/runtime/stubs_s390x.go index a2b07ff8aa8..6d704e82003 100644 --- a/src/runtime/stubs_s390x.go +++ b/src/runtime/stubs_s390x.go @@ -8,6 +8,13 @@ package runtime func load_g() func save_g() +// Used by reflectcall and the reflect package. +// +// Spills/loads arguments in registers to/from an internal/abi.RegArgs +// respectively. Does not follow the Go ABI. +func spillArgs() +func unspillArgs() + // getfp returns the frame pointer register of its caller or 0 if not implemented. // TODO: Make this a compiler intrinsic func getfp() uintptr { return 0 } diff --git a/src/runtime/tls_s390x.s b/src/runtime/tls_s390x.s index cb6a21c1143..388e7b88bd9 100644 --- a/src/runtime/tls_s390x.s +++ b/src/runtime/tls_s390x.s @@ -19,7 +19,7 @@ // // If !iscgo, this is a no-op. // -// NOTE: setg_gcc<> assume this clobbers only R10 and R11. +// NOTE: setg_gcc<> and mcall assume this clobbers only R10 and R11. TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0 MOVB runtime·iscgo(SB), R10 CMPBEQ R10, $0, nocgo From e92d2964fa5beb678ecd97036eb732c9f885cf63 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 04:57:22 +0100 Subject: [PATCH 08/15] runtime: mark race functions on s390x as ABIInternal This adds ABIInternal to the race function declarations. Update #40724 Change-Id: I827f94fa08240a17a4107a39bca6b4e279dc2530 Reviewed-on: https://go-review.googlesource.com/c/go/+/719422 Reviewed-by: Michael Pratt Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Vishwanatha HD LUCI-TryBot-Result: Go LUCI --- src/runtime/race_s390x.s | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/runtime/race_s390x.s b/src/runtime/race_s390x.s index 3dfda9e733b..d5a0bbedace 100644 --- a/src/runtime/race_s390x.s +++ b/src/runtime/race_s390x.s @@ -25,10 +25,14 @@ // func runtime·raceread(addr uintptr) // Called from instrumented code. -TEXT runtime·raceread(SB), NOSPLIT, $0-8 +TEXT runtime·raceread(SB), NOSPLIT, $0-8 // void __tsan_read(ThreadState *thr, void *addr, void *pc); MOVD $__tsan_read(SB), R1 +#ifndef GOEXPERIMENT_regabiargs MOVD addr+0(FP), R3 +#else + MOVD R2, R3 +#endif MOVD R14, R4 JMP racecalladdr<>(SB) @@ -46,10 +50,14 @@ TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 // func runtime·racewrite(addr uintptr) // Called from instrumented code. -TEXT runtime·racewrite(SB), NOSPLIT, $0-8 +TEXT runtime·racewrite(SB), NOSPLIT, $0-8 // void __tsan_write(ThreadState *thr, void *addr, void *pc); MOVD $__tsan_write(SB), R1 +#ifndef GOEXPERIMENT_regabiargs MOVD addr+0(FP), R3 +#else + MOVD R2, R3 +#endif MOVD R14, R4 JMP racecalladdr<>(SB) @@ -67,10 +75,15 @@ TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 // func runtime·racereadrange(addr, size uintptr) // Called from instrumented code. -TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 +TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); MOVD $__tsan_read_range(SB), R1 +#ifndef GOEXPERIMENT_regabiargs LMG addr+0(FP), R3, R4 +#else + MOVD R3, R4 + MOVD R2, R3 +#endif MOVD R14, R5 JMP racecalladdr<>(SB) @@ -91,10 +104,15 @@ TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 // func runtime·racewriterange(addr, size uintptr) // Called from instrumented code. -TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 +TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); MOVD $__tsan_write_range(SB), R1 +#ifndef GOEXPERIMENT_regabiargs LMG addr+0(FP), R3, R4 +#else + MOVD R3, R4 + MOVD R2, R3 +#endif MOVD R14, R5 JMP racecalladdr<>(SB) From 2a185fae7e9a3905ff9a96c44a0d6aac6c8aeb03 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 05:07:49 +0100 Subject: [PATCH 09/15] reflect, runtime: add reflect support for regabi on s390x This adds the regabi support needed for reflect calls makeFuncSub and methodValueCall. Also, It add's archFloat32FromReg and archFloat32ToReg. Update #40724 Change-Id: Ic4b9e30c82f292a24fd2c2b9796cd80a58cecf77 Reviewed-on: https://go-review.googlesource.com/c/go/+/719480 Reviewed-by: Vishwanatha HD Reviewed-by: Michael Pratt Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/reflect/asm_s390x.s | 76 +++++++++++++++++++++++++------ src/reflect/float32reg_generic.go | 2 +- src/reflect/float32reg_s390x.s | 30 ++++++++++++ src/reflect/stubs_s390x.go | 10 ++++ src/runtime/stkframe.go | 2 +- 5 files changed, 104 insertions(+), 16 deletions(-) create mode 100644 src/reflect/float32reg_s390x.s create mode 100644 src/reflect/stubs_s390x.go diff --git a/src/reflect/asm_s390x.s b/src/reflect/asm_s390x.s index 4bd6613004f..22cbd38ee5c 100644 --- a/src/reflect/asm_s390x.s +++ b/src/reflect/asm_s390x.s @@ -5,34 +5,82 @@ #include "textflag.h" #include "funcdata.h" +// The frames of each of the two functions below contain two locals, at offsets +// that are known to the runtime. +// +// The first local is a bool called retValid with a whole pointer-word reserved +// for it on the stack. The purpose of this word is so that the runtime knows +// whether the stack-allocated return space contains valid values for stack +// scanning. +// +// The second local is an abi.RegArgs value whose offset is also known to the +// runtime, so that a stack map for it can be constructed, since it contains +// pointers visible to the GC. + +#define LOCAL_RETVALID 40 +#define LOCAL_REGARGS 48 + +// The frame size of the functions below is +// 32 (args of callReflect/callMethod) + 8 (bool + padding) + 264 (abi.RegArgs) = 304. + // makeFuncStub is the code half of the function returned by MakeFunc. // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. // No arg size here, runtime pulls arg map out of the func value. -TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$40 +TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$304 NO_LOCAL_POINTERS + ADD $LOCAL_REGARGS, R15, R10 // spillArgs using R10 + BL runtime·spillArgs(SB) + MOVD R12, 32(R15) // save context reg R12 > args of moveMakeFuncArgPtrs < LOCAL_REGARGS +#ifdef GOEXPERIMENT_regabiargs + MOVD R12, R2 + MOVD R10, R3 +#else MOVD R12, 8(R15) - MOVD $argframe+0(FP), R3 - MOVD R3, 16(R15) - MOVB $0, 40(R15) - ADD $40, R15, R3 - MOVD R3, 24(R15) - MOVD $0, 32(R15) + MOVD R10, 16(R15) +#endif + BL ·moveMakeFuncArgPtrs(SB) + MOVD 32(R15), R12 // restore context reg R12 + MOVD R12, 8(R15) + MOVD $argframe+0(FP), R1 + MOVD R1, 16(R15) + MOVB $0, LOCAL_RETVALID(R15) + ADD $LOCAL_RETVALID, R15, R1 + MOVD R1, 24(R15) + ADD $LOCAL_REGARGS, R15, R1 + MOVD R1, 32(R15) BL ·callReflect(SB) + ADD $LOCAL_REGARGS, R15, R10 // unspillArgs using R10 + BL runtime·unspillArgs(SB) RET // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. // No arg size here; runtime pulls arg map out of the func value. -TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$40 +TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$304 NO_LOCAL_POINTERS + ADD $LOCAL_REGARGS, R15, R10 // spillArgs using R10 + BL runtime·spillArgs(SB) + MOVD R12, 32(R15) // save context reg R12 > args of moveMakeFuncArgPtrs < LOCAL_REGARGS +#ifdef GOEXPERIMENT_regabiargs + MOVD R12, R2 + MOVD R10, R3 +#else MOVD R12, 8(R15) - MOVD $argframe+0(FP), R3 - MOVD R3, 16(R15) - MOVB $0, 40(R15) - ADD $40, R15, R3 - MOVD R3, 24(R15) - MOVD $0, 32(R15) + MOVD R10, 16(R15) +#endif + BL ·moveMakeFuncArgPtrs(SB) + MOVD 32(R15), R12 // restore context reg R12 + MOVD R12, 8(R15) + MOVD $argframe+0(FP), R1 + MOVD R1, 16(R15) + MOVB $0, LOCAL_RETVALID(R15) + ADD $LOCAL_RETVALID, R15, R1 + MOVD R1, 24(R15) + ADD $LOCAL_REGARGS, R15, R1 + MOVD R1, 32(R15) BL ·callMethod(SB) + ADD $LOCAL_REGARGS, R15, R10 // unspillArgs using R10 + BL runtime·unspillArgs(SB) RET diff --git a/src/reflect/float32reg_generic.go b/src/reflect/float32reg_generic.go index 23ad4bf285b..efbc3edc3d5 100644 --- a/src/reflect/float32reg_generic.go +++ b/src/reflect/float32reg_generic.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !ppc64 && !ppc64le && !riscv64 +//go:build !ppc64 && !ppc64le && !riscv64 && !s390x package reflect diff --git a/src/reflect/float32reg_s390x.s b/src/reflect/float32reg_s390x.s new file mode 100644 index 00000000000..bcf55823267 --- /dev/null +++ b/src/reflect/float32reg_s390x.s @@ -0,0 +1,30 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build s390x + +#include "textflag.h" + +// On s390x, the float32 becomes a float64 +// when loaded in a register, different from +// other platforms. These functions are +// needed to ensure correct conversions on s390x. + +// Convert float32->uint64 +TEXT ·archFloat32ToReg(SB),NOSPLIT,$0-16 + FMOVS val+0(FP), F1 + FMOVD F1, ret+8(FP) + RET + +// Convert uint64->float32 +TEXT ·archFloat32FromReg(SB),NOSPLIT,$0-12 + FMOVD reg+0(FP), F1 + // Normally a float64->float32 conversion + // would need rounding, but that is not needed + // here since the uint64 was originally converted + // from float32, and should be avoided to + // preserve SNaN values. + FMOVS F1, ret+8(FP) + RET + diff --git a/src/reflect/stubs_s390x.go b/src/reflect/stubs_s390x.go new file mode 100644 index 00000000000..03504d7904b --- /dev/null +++ b/src/reflect/stubs_s390x.go @@ -0,0 +1,10 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build s390x + +package reflect + +func archFloat32FromReg(reg uint64) float32 +func archFloat32ToReg(val float32) uint64 diff --git a/src/runtime/stkframe.go b/src/runtime/stkframe.go index 819b7f6c7d9..d6e7e0371c0 100644 --- a/src/runtime/stkframe.go +++ b/src/runtime/stkframe.go @@ -234,7 +234,7 @@ func (frame *stkframe) getStackMap(debug bool) (locals, args bitvector, objs []s } // stack objects. - if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") && + if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64" || GOARCH == "s390x") && unsafe.Sizeof(abi.RegArgs{}) > 0 && isReflect { // For reflect.makeFuncStub and reflect.methodValueCall, // we need to fake the stack object record. From 58a48a3e3bed76803bb3ac59d1d239dde654f336 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 05:19:47 +0100 Subject: [PATCH 10/15] internal/runtime/syscall: Syscall changes for s390x regabi Updates #40724 Change-Id: I07a01ac1bda71214f01f4a72e15ab469ef275725 Reviewed-on: https://go-review.googlesource.com/c/go/+/719423 Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall Reviewed-by: David Chase Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- .../runtime/syscall/linux/asm_linux_s390x.s | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/internal/runtime/syscall/linux/asm_linux_s390x.s b/src/internal/runtime/syscall/linux/asm_linux_s390x.s index 1b27f293907..c912afab649 100644 --- a/src/internal/runtime/syscall/linux/asm_linux_s390x.s +++ b/src/internal/runtime/syscall/linux/asm_linux_s390x.s @@ -5,7 +5,16 @@ #include "textflag.h" // func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr) -TEXT ·Syscall6(SB),NOSPLIT,$0-80 +TEXT ·Syscall6(SB),NOSPLIT,$0-80 +#ifdef GOEXPERIMENT_regabiargs + MOVD R2, R1 + MOVD R3, R2 + MOVD R4, R3 + MOVD R5, R4 + MOVD R6, R5 + MOVD R7, R6 + MOVD R8, R7 +#else MOVD num+0(FP), R1 // syscall entry MOVD a1+8(FP), R2 MOVD a2+16(FP), R3 @@ -13,16 +22,27 @@ TEXT ·Syscall6(SB),NOSPLIT,$0-80 MOVD a4+32(FP), R5 MOVD a5+40(FP), R6 MOVD a6+48(FP), R7 +#endif SYSCALL MOVD $0xfffffffffffff001, R8 CMPUBLT R2, R8, ok +#ifdef GOEXPERIMENT_regabiargs + MOVD $0, R3 + NEG R2, R4 + MOVD $-1, R2 +#else MOVD $-1, r1+56(FP) MOVD $0, r2+64(FP) NEG R2, R2 MOVD R2, errno+72(FP) +#endif RET ok: +#ifdef GOEXPERIMENT_regabiargs + MOVD $0, R4 +#else MOVD R2, r1+56(FP) MOVD R3, r2+64(FP) MOVD $0, errno+72(FP) +#endif RET From 4529c8fba65bf2c80e3bd4dac0851451a101c936 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 05:22:32 +0100 Subject: [PATCH 11/15] runtime: port memmove, memclr to register ABI on s390x This allows memmove and memclr to be invoked using the new register ABI on s390x. Update #40724 Change-Id: I2e799aac693ddd693266c156c525d6303060796f Reviewed-on: https://go-review.googlesource.com/c/go/+/719424 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall --- src/runtime/memclr_s390x.s | 8 ++++++-- src/runtime/memmove_s390x.s | 8 +++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/runtime/memclr_s390x.s b/src/runtime/memclr_s390x.s index 392057565e8..919423edf78 100644 --- a/src/runtime/memclr_s390x.s +++ b/src/runtime/memclr_s390x.s @@ -7,10 +7,14 @@ // See memclrNoHeapPointers Go doc for important implementation constraints. // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) -TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT|NOFRAME,$0-16 +TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT|NOFRAME,$0-16 +#ifndef GOEXPERIMENT_regabiargs MOVD ptr+0(FP), R4 MOVD n+8(FP), R5 - +#else + MOVD R2, R4 + MOVD R3, R5 +#endif CMPBGE R5, $32, clearge32 start: diff --git a/src/runtime/memmove_s390x.s b/src/runtime/memmove_s390x.s index f4c2b87d929..28c6a5dab20 100644 --- a/src/runtime/memmove_s390x.s +++ b/src/runtime/memmove_s390x.s @@ -7,10 +7,16 @@ // See memmove Go doc for important implementation constraints. // func memmove(to, from unsafe.Pointer, n uintptr) -TEXT runtime·memmove(SB),NOSPLIT|NOFRAME,$0-24 +TEXT runtime·memmove(SB),NOSPLIT|NOFRAME,$0-24 +#ifndef GOEXPERIMENT_regabiargs MOVD to+0(FP), R6 MOVD from+8(FP), R4 MOVD n+16(FP), R5 +#else + MOVD R4, R5 + MOVD R3, R4 + MOVD R2, R6 +#endif CMPBEQ R6, R4, done From 2fe0ba8d5238c408eaa5fbd4a7721a7be18170e5 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 05:26:42 +0100 Subject: [PATCH 12/15] internal/bytealg: port bytealg functions to reg ABI on s390x This adds support for the reg ABI to the byte/string functions for s390x. These are initially under control of the GOEXPERIMENT macro until all changes are in. Updates #40724 Change-Id: Ia3532523fe3a839cc0370d6fe1544972327be514 Reviewed-on: https://go-review.googlesource.com/c/go/+/719481 Reviewed-by: Vishwanatha HD Reviewed-by: David Chase Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/internal/bytealg/compare_s390x.s | 98 +++++++++----- src/internal/bytealg/equal_s390x.s | 104 ++++++++------- src/internal/bytealg/indexbyte_s390x.s | 178 +++++++++++++++---------- 3 files changed, 225 insertions(+), 155 deletions(-) diff --git a/src/internal/bytealg/compare_s390x.s b/src/internal/bytealg/compare_s390x.s index 539454870d3..64f537be99e 100644 --- a/src/internal/bytealg/compare_s390x.s +++ b/src/internal/bytealg/compare_s390x.s @@ -5,65 +5,93 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 - MOVD a_base+0(FP), R3 - MOVD a_len+8(FP), R4 - MOVD b_base+24(FP), R5 - MOVD b_len+32(FP), R6 - LA ret+48(FP), R7 +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 +#ifndef GOEXPERIMENT_regabiargs + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R3 + MOVD b_base+24(FP), R4 + MOVD b_len+32(FP), R5 + LA ret+48(FP), R6 +#else + // R2 = a_base + // R3 = a_len + // R4 = a_cap (unused) + // R5 = b_base (want in R4) + // R6 = b_len (want in R5) + // R7 = b_cap (unused) + MOVD R5, R4 + MOVD R6, R5 +#endif BR cmpbody<>(SB) -TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 - MOVD a_base+0(FP), R3 - MOVD a_len+8(FP), R4 - MOVD b_base+16(FP), R5 - MOVD b_len+24(FP), R6 - LA ret+32(FP), R7 +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R3 + MOVD b_base+16(FP), R4 + MOVD b_len+24(FP), R5 + LA ret+32(FP), R6 +#endif + // R2 = a_base + // R3 = a_len + // R4 = b_base + // R5 = b_len + BR cmpbody<>(SB) // input: -// R3 = a -// R4 = alen -// R5 = b -// R6 = blen -// R7 = address of output word (stores -1/0/1 here) +// R2 = a +// R3 = alen +// R4 = b +// R5 = blen +// For regabiargs output value( -1/0/1 ) stored in R2 +// For !regabiargs address of output word( stores -1/0/1 ) stored in R6 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 - CMPBEQ R3, R5, cmplengths - MOVD R4, R8 - CMPBLE R4, R6, amin - MOVD R6, R8 + CMPBEQ R2, R4, cmplengths + MOVD R3, R7 + CMPBLE R3, R5, amin + MOVD R5, R7 amin: - CMPBEQ R8, $0, cmplengths - CMP R8, $256 + CMPBEQ R7, $0, cmplengths + CMP R7, $256 BLE tail loop: - CLC $256, 0(R3), 0(R5) + CLC $256, 0(R2), 0(R4) BGT gt BLT lt - SUB $256, R8 - MOVD $256(R3), R3 - MOVD $256(R5), R5 - CMP R8, $256 + SUB $256, R7 + MOVD $256(R2), R2 + MOVD $256(R4), R4 + CMP R7, $256 BGT loop tail: - SUB $1, R8 - EXRL $cmpbodyclc<>(SB), R8 + SUB $1, R7 + EXRL $cmpbodyclc<>(SB), R7 BGT gt BLT lt cmplengths: - CMP R4, R6 + CMP R3, R5 BEQ eq BLT lt gt: - MOVD $1, 0(R7) + MOVD $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET lt: - MOVD $-1, 0(R7) + MOVD $-1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET eq: - MOVD $0, 0(R7) + MOVD $0, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 - CLC $1, 0(R3), 0(R5) + CLC $1, 0(R2), 0(R4) RET diff --git a/src/internal/bytealg/equal_s390x.s b/src/internal/bytealg/equal_s390x.s index 67f814dfc1c..48e8d0f1547 100644 --- a/src/internal/bytealg/equal_s390x.s +++ b/src/internal/bytealg/equal_s390x.s @@ -6,80 +6,92 @@ #include "textflag.h" // memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - MOVD a+0(FP), R3 - MOVD b+8(FP), R5 - MOVD size+16(FP), R6 - LA ret+24(FP), R7 +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 +#ifndef GOEXPERIMENT_regabiargs + MOVD a+0(FP), R2 + MOVD b+8(FP), R3 + MOVD size+16(FP), R4 + LA ret+24(FP), R5 +#endif BR memeqbody<>(SB) // memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 - MOVD a+0(FP), R3 - MOVD b+8(FP), R5 - MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure - LA ret+16(FP), R7 +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 +#ifndef GOEXPERIMENT_regabiargs + MOVD a+0(FP), R2 + MOVD b+8(FP), R3 + LA ret+16(FP), R5 +#endif + + MOVD 8(R12), R4 // compiler stores size at offset 8 in the closure BR memeqbody<>(SB) // input: -// R3 = a -// R5 = b -// R6 = len -// R7 = address of output byte (stores 0 or 1 here) +// R2 = a +// R3 = b +// R4 = len +// For regabiargs output value( 0/1 ) stored in R2 +// For !regabiargs address of output byte( stores 0/1 ) stored in R5 // a and b have the same length TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 - CMPBEQ R3, R5, equal + CMPBEQ R2, R3, equal loop: - CMPBEQ R6, $0, equal - CMPBLT R6, $32, tiny - CMP R6, $256 + CMPBEQ R4, $0, equal + CMPBLT R4, $32, tiny + CMP R4, $256 BLT tail - CLC $256, 0(R3), 0(R5) + CLC $256, 0(R2), 0(R3) BNE notequal - SUB $256, R6 + SUB $256, R4 + LA 256(R2), R2 LA 256(R3), R3 - LA 256(R5), R5 BR loop tail: - SUB $1, R6, R8 + SUB $1, R4, R8 EXRL $memeqbodyclc<>(SB), R8 BEQ equal notequal: - MOVB $0, 0(R7) + MOVD $0, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVB R2, 0(R5) +#endif RET equal: - MOVB $1, 0(R7) + MOVD $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVB R2, 0(R5) +#endif RET tiny: - MOVD $0, R2 - CMPBLT R6, $16, lt16 - MOVD 0(R3), R8 - MOVD 0(R5), R9 + MOVD $0, R1 + CMPBLT R4, $16, lt16 + MOVD 0(R2), R8 + MOVD 0(R3), R9 CMPBNE R8, R9, notequal - MOVD 8(R3), R8 - MOVD 8(R5), R9 + MOVD 8(R2), R8 + MOVD 8(R3), R9 CMPBNE R8, R9, notequal - LA 16(R2), R2 - SUB $16, R6 + LA 16(R1), R1 + SUB $16, R4 lt16: - CMPBLT R6, $8, lt8 - MOVD 0(R3)(R2*1), R8 - MOVD 0(R5)(R2*1), R9 + CMPBLT R4, $8, lt8 + MOVD 0(R2)(R1*1), R8 + MOVD 0(R3)(R1*1), R9 CMPBNE R8, R9, notequal - LA 8(R2), R2 - SUB $8, R6 + LA 8(R1), R1 + SUB $8, R4 lt8: - CMPBLT R6, $4, lt4 - MOVWZ 0(R3)(R2*1), R8 - MOVWZ 0(R5)(R2*1), R9 + CMPBLT R4, $4, lt4 + MOVWZ 0(R2)(R1*1), R8 + MOVWZ 0(R3)(R1*1), R9 CMPBNE R8, R9, notequal - LA 4(R2), R2 - SUB $4, R6 + LA 4(R1), R1 + SUB $4, R4 lt4: #define CHECK(n) \ - CMPBEQ R6, $n, equal \ - MOVB n(R3)(R2*1), R8 \ - MOVB n(R5)(R2*1), R9 \ + CMPBEQ R4, $n, equal \ + MOVB n(R2)(R1*1), R8 \ + MOVB n(R3)(R1*1), R9 \ CMPBNE R8, R9, notequal CHECK(0) CHECK(1) @@ -88,5 +100,5 @@ lt4: BR equal TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 - CLC $1, 0(R3), 0(R5) + CLC $1, 0(R2), 0(R3) RET diff --git a/src/internal/bytealg/indexbyte_s390x.s b/src/internal/bytealg/indexbyte_s390x.s index cf88d92a24b..343ed672f76 100644 --- a/src/internal/bytealg/indexbyte_s390x.s +++ b/src/internal/bytealg/indexbyte_s390x.s @@ -5,104 +5,134 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 - MOVD b_base+0(FP), R3// b_base => R3 - MOVD b_len+8(FP), R4 // b_len => R4 - MOVBZ c+24(FP), R5 // c => R5 - MOVD $ret+32(FP), R2 // &ret => R9 - BR indexbytebody<>(SB) -TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 - MOVD s_base+0(FP), R3// s_base => R3 - MOVD s_len+8(FP), R4 // s_len => R4 - MOVBZ c+16(FP), R5 // c => R5 - MOVD $ret+24(FP), R2 // &ret => R9 - BR indexbytebody<>(SB) +TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOVD b_base+0(FP), R2// b_base => R2 + MOVD b_len+8(FP), R3 // b_len => R3 + MOVBZ c+24(FP), R4 // c => R4 + MOVD $ret+32(FP), R5 // &ret => R5 +#else + MOVD R5, R4 + AND $0xff, R4 +#endif + BR indexbytebody<>(SB) + +TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOVD s_base+0(FP), R2 // s_base => R2 + MOVD s_len+8(FP), R3 // s_len => R3 + MOVBZ c+16(FP), R4 // c => R4 + MOVD $ret+24(FP), R5 // &ret => R5 +#else + AND $0xff, R4 +#endif + BR indexbytebody<>(SB) // input: -// R3: s -// R4: s_len -// R5: c -- byte sought -// R2: &ret -- address to put index into +// R2: s +// R3: s_len +// R4: c -- byte sought +// For regabiargs output value(index) stored in R2 +// For !regabiargs address of output value(index) stored in R5 TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0 - CMPBEQ R4, $0, notfound - MOVD R3, R6 // store base for later - ADD R3, R4, R8 // the address after the end of the string - //if the length is small, use loop; otherwise, use vector or srst search - CMPBGE R4, $16, large + CMPBEQ R3, $0, notfound + MOVD R2, R6 // store base for later + ADD R2, R3, R8 // the address after the end of the string + //if the length is small, use loop; otherwise, use vector or srst search + CMPBGE R3, $16, large residual: - CMPBEQ R3, R8, notfound - MOVBZ 0(R3), R7 - LA 1(R3), R3 - CMPBNE R7, R5, residual + CMPBEQ R2, R8, notfound + MOVBZ 0(R2), R7 + LA 1(R2), R2 + CMPBNE R7, R4, residual found: - SUB R6, R3 - SUB $1, R3 - MOVD R3, 0(R2) - RET + SUB R6, R2 + SUB $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R5) +#endif + RET notfound: - MOVD $-1, 0(R2) - RET +#ifndef GOEXPERIMENT_regabiargs + MOVD $-1, 0(R5) +#else + MOVD $-1, R2 +#endif + RET large: - MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 - CMPBNE R1, $0, vectorimpl + MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 + CMPBNE R1, $0, vectorimpl srstimpl: // no vector facility - MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 + MOVBZ R4, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 srstloop: - WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) - BVS srstloop // interrupted - continue - BGT notfoundr0 + WORD $0xB25E0082 // srst %r8, %r2 (search the range [R2, R8)) + BVS srstloop // interrupted - continue + BGT notfoundr0 foundr0: - XOR R0, R0 // reset R0 - SUB R6, R8 // remove base - MOVD R8, 0(R2) - RET + XOR R0, R0 // reset R0 + SUB R6, R8 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOVD R8, 0(R5) +#else + MOVD R8, R2 +#endif + RET notfoundr0: - XOR R0, R0 // reset R0 - MOVD $-1, 0(R2) - RET + XOR R0, R0 // reset R0 +#ifndef GOEXPERIMENT_regabiargs + MOVD $-1, 0(R5) +#else + MOVD $-1, R2 +#endif + RET vectorimpl: - //if the address is not 16byte aligned, use loop for the header - MOVD R3, R8 - AND $15, R8 - CMPBGT R8, $0, notaligned + //if the address is not 16byte aligned, use loop for the header + MOVD R2, R8 + AND $15, R8 + CMPBGT R8, $0, notaligned aligned: - ADD R6, R4, R8 - MOVD R8, R7 - AND $-16, R7 - // replicate c across V17 - VLVGB $0, R5, V19 - VREPB $0, V19, V17 + ADD R6, R3, R8 + MOVD R8, R7 + AND $-16, R7 + // replicate c across V17 + VLVGB $0, R4, V19 + VREPB $0, V19, V17 vectorloop: - CMPBGE R3, R7, residual - VL 0(R3), V16 // load string to be searched into V16 - ADD $16, R3 - VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly - BVS vectorloop + CMPBGE R2, R7, residual + VL 0(R2), V16 // load string to be searched into V16 + ADD $16, R2 + VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly + BVS vectorloop - // when vector search found c in the string - VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 - SUB $16, R3 - SUB R6, R3 - ADD R3, R7 - MOVD R7, 0(R2) - RET + // when vector search found c in the string + VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 + SUB $16, R2 + SUB R6, R2 + ADD R2, R7 +#ifndef GOEXPERIMENT_regabiargs + MOVD R7, 0(R5) +#else + MOVD R7, R2 +#endif + RET notaligned: - MOVD R3, R8 - AND $-16, R8 - ADD $16, R8 + MOVD R2, R8 + AND $-16, R8 + ADD $16, R8 notalignedloop: - CMPBEQ R3, R8, aligned - MOVBZ 0(R3), R7 - LA 1(R3), R3 - CMPBNE R7, R5, notalignedloop - BR found + CMPBEQ R2, R8, aligned + MOVBZ 0(R2), R7 + LA 1(R2), R2 + CMPBNE R7, R4, notalignedloop + BR found + From 89552911b383b78cd8807ec747ca5df8ba52e239 Mon Sep 17 00:00:00 2001 From: Srinivas Pokala Date: Tue, 11 Nov 2025 05:52:28 +0100 Subject: [PATCH 13/15] cmd/compile, internal/buildcfg: enable regABI on s390x, and add s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in test func hasRegisterABI ``` goos: linux goarch: s390x shortname: minio pkg: github.com/minio/minio/cmd │ old │ new │ │ sec/op │ sec/op vs base │ DecodehealingTracker-128 1227.5n ± 1% 894.6n ± 1% -27.12% (p=0.000 n=10) AppendMsgResyncTargetsInfo-128 8.755n ± 0% 4.942n ± 20% -43.55% (p=0.000 n=10) DataUpdateTracker-128 2.075µ ± 1% 1.949µ ± 1% -6.10% (p=0.000 n=10) MarshalMsgdataUsageCacheInfo-128 64.32n ± 2% 50.57n ± 6% -21.37% (p=0.000 n=10) geomean 194.6n 144.5n -25.76% shortname: gonum_topo pkg: gonum.org/v1/gonum/graph/topo │ old │ new │ │ sec/op │ sec/op vs base │ TarjanSCCGnp_10_tenth-128 8.733µ ± 1% 6.953µ ± 2% -20.38% (p=0.000 n=10) TarjanSCCGnp_1000_half-128 101.60m ± 0% 72.79m ± 1% -28.36% (p=0.000 n=10) geomean 942.0µ 711.4µ -24.48% shortname: gonum_traverse pkg: gonum.org/v1/gonum/graph/traverse │ old │ new │ │ sec/op │ sec/op vs base │ WalkAllBreadthFirstGnp_10_tenth-128 3.871µ ± 2% 3.242µ ± 2% -16.25% (p=0.000 n=10) WalkAllBreadthFirstGnp_1000_tenth-128 11.879m ± 1% 9.034m ± 1% -23.95% (p=0.000 n=10) geomean 214.4µ 171.1µ -20.19% hortname: ericlagergren_decimal pkg: github.com/ericlagergren/decimal/benchmarks │ old │ new │ │ sec/op │ sec/op vs base │ Pi/foo=ericlagergren_(Go)/prec=100-128 181.6µ ± 0% 145.3µ ± 2% -20.01% (p=0.000 n=10) Pi/foo=ericlagergren_(GDA)/prec=100-128 356.4µ ± 1% 298.2µ ± 2% -16.33% (p=0.000 n=10) Pi/foo=shopspring/prec=100-128 426.5µ ± 2% 403.1µ ± 4% -5.47% (p=0.000 n=10) Pi/foo=apmckinlay/prec=100-128 4.943µ ± 0% 3.903µ ± 1% -21.03% (p=0.000 n=10) Pi/foo=go-inf/prec=100-128 132.1µ ± 4% 119.7µ ± 3% -9.37% (p=0.000 n=10) Pi/foo=float64/prec=100-128 4.210µ ± 0% 4.210µ ± 0% ~ (p=0.269 n=10) geomean 65.07µ 57.02µ -12.37% shortname: uber_tally pkg: github.com/uber-go/tally │ old │ new │ │ sec/op │ sec/op vs base │ ScopeTaggedNoCachedSubscopes-128 3.511µ ± 12% 3.067µ ± 6% -12.63% (p=0.000 n=10) HistogramAllocation-128 1.085µ ± 15% 1.011µ ± 6% -6.87% (p=0.001 n=10) geomean 1.952µ 1.760µ -9.80% shortname: uber_zap pkg: go.uber.org/zap/zapcore │ old │ new │ │ sec/op │ sec/op vs base │ BufferedWriteSyncer/write_file_with_buffer-128 119.0n ± 3% 101.7n ± 5% -14.54% (p=0.000 n=10) MultiWriteSyncer/2_discarder-128 13.320n ± 34% 9.410n ± 28% -29.35% (p=0.005 n=10) MultiWriteSyncer/4_discarder-128 10.830n ± 10% 8.883n ± 8% -17.98% (p=0.000 n=10) MultiWriteSyncer/4_discarder_with_buffer-128 119.0n ± 5% 104.1n ± 4% -12.52% (p=0.000 n=10) WriteSyncer/write_file_with_no_buffer-128 1.393µ ± 10% 1.409µ ± 7% ~ (p=1.000 n=10) ZapConsole-128 796.9n ± 14% 722.2n ± 7% -9.37% (p=0.003 n=10) JSONLogMarshalerFunc-128 1.233µ ± 5% 1.095µ ± 8% -11.20% (p=0.002 n=10) ZapJSON-128 560.7n ± 9% 547.9n ± 6% ~ (p=0.289 n=10) StandardJSON-128 628.7n ± 7% 566.2n ± 7% -9.95% (p=0.001 n=10) Sampler_Check/7_keys-128 8.068n ± 17% 8.232n ± 4% ~ (p=0.382 n=10) Sampler_Check/50_keys-128 4.064n ± 13% 3.610n ± 17% ~ (p=0.063 n=10) Sampler_Check/100_keys-128 6.559n ± 5% 6.386n ± 6% ~ (p=0.063 n=10) Sampler_CheckWithHook/7_keys-128 40.04n ± 3% 36.82n ± 6% -8.05% (p=0.000 n=10) Sampler_CheckWithHook/50_keys-128 39.48n ± 3% 36.48n ± 4% -7.61% (p=0.000 n=10) Sampler_CheckWithHook/100_keys-128 41.27n ± 5% 40.85n ± 9% ~ (p=0.353 n=10) TeeCheck-128 135.2n ± 11% 128.2n ± 10% ~ (p=0.190 n=10) geomean 77.98n 70.91n -9.07% shortname: spexs2 pkg: github.com/egonelbre/spexs2/_benchmark │ old │ new │ │ sec/op │ sec/op vs base │ Run/10k/1-128 21.58 ± 2% 19.68 ± 12% -8.84% (p=0.015 n=10) Run/10k/16-128 4.539 ± 6% 4.063 ± 7% -10.48% (p=0.000 n=10) geomean 9.898 8.941 -9.67% ``` Update #40724 Change-Id: I3c3c02e766e2f7402e385eddadbfe09361d82387 Reviewed-on: https://go-review.googlesource.com/c/go/+/719482 Reviewed-by: Vishwanatha HD Reviewed-by: Keith Randall Reviewed-by: Michael Pratt Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/config.go | 4 ++-- src/cmd/compile/internal/ssa/debug_lines_test.go | 2 +- src/internal/buildcfg/exp.go | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index cb41bc5ed5a..6b419e7da77 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -305,8 +305,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.registers = registersS390X[:] c.gpRegMask = gpRegMaskS390X c.fpRegMask = fpRegMaskS390X - //c.intParamRegs = paramIntRegS390X - //c.floatParamRegs = paramFloatRegS390X + c.intParamRegs = paramIntRegS390X + c.floatParamRegs = paramFloatRegS390X c.FPReg = framepointerRegS390X c.LinkReg = linkRegS390X c.hasGReg = true diff --git a/src/cmd/compile/internal/ssa/debug_lines_test.go b/src/cmd/compile/internal/ssa/debug_lines_test.go index 79dbd91c2fc..5dbfdeb7f6e 100644 --- a/src/cmd/compile/internal/ssa/debug_lines_test.go +++ b/src/cmd/compile/internal/ssa/debug_lines_test.go @@ -45,7 +45,7 @@ func testGoArch() string { func hasRegisterABI() bool { switch testGoArch() { - case "amd64", "arm64", "loong64", "ppc64", "ppc64le", "riscv": + case "amd64", "arm64", "loong64", "ppc64", "ppc64le", "riscv", "s390x": return true } return false diff --git a/src/internal/buildcfg/exp.go b/src/internal/buildcfg/exp.go index f1a1d8632ef..d33c9e65fb9 100644 --- a/src/internal/buildcfg/exp.go +++ b/src/internal/buildcfg/exp.go @@ -65,6 +65,8 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { case "amd64", "arm64", "loong64", "ppc64le", "ppc64", "riscv64": regabiAlwaysOn = true regabiSupported = true + case "s390x": + regabiSupported = true } // Older versions (anything before V16) of dsymutil don't handle @@ -143,7 +145,7 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { flags.RegabiWrappers = true flags.RegabiArgs = true } - // regabi is only supported on amd64, arm64, loong64, riscv64, ppc64 and ppc64le. + // regabi is only supported on amd64, arm64, loong64, riscv64, s390x, ppc64 and ppc64le. if !regabiSupported { flags.RegabiWrappers = false flags.RegabiArgs = false From a593ca9d657efb1ea021c9fc51cb528c398bbf4e Mon Sep 17 00:00:00 2001 From: Alexandre Daubois Date: Mon, 24 Nov 2025 08:14:18 +0000 Subject: [PATCH 14/15] runtime/cgo: add support for `any` param and return type When using `any` as param or return type of an exported function, we currently have the error `unrecognized Go type any`. `any` is an alias of `interface{}` which is already supported. This would avoid such change: https://github.com/php/frankenphp/pull/1976 Fixes #76340 Change-Id: I301838ff72e99ae78b035a8eff2405f6a145ed1a GitHub-Last-Rev: 7dfbccfa582bbc6e79ed29677391b9ae81a9b5bd GitHub-Pull-Request: golang/go#76325 Reviewed-on: https://go-review.googlesource.com/c/go/+/720960 Reviewed-by: Mark Freeman Auto-Submit: Ian Lance Taylor Reviewed-by: Keith Randall Reviewed-by: Ian Lance Taylor Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/cmd/cgo/gcc.go | 3 +++ src/cmd/cgo/internal/test/cgo_test.go | 1 + src/cmd/cgo/internal/test/test.go | 31 +++++++++++++++++++++++++++ src/cmd/cgo/internal/test/testx.go | 18 ++++++++++++++++ src/cmd/cgo/out.go | 3 +++ src/runtime/cgocall.go | 3 +++ 6 files changed, 59 insertions(+) diff --git a/src/cmd/cgo/gcc.go b/src/cmd/cgo/gcc.go index d3de3906b48..300ccae350b 100644 --- a/src/cmd/cgo/gcc.go +++ b/src/cmd/cgo/gcc.go @@ -1121,6 +1121,9 @@ func (p *Package) hasPointer(f *File, t ast.Expr, top bool) bool { if t.Name == "error" { return true } + if t.Name == "any" { + return true + } if goTypes[t.Name] != nil { return false } diff --git a/src/cmd/cgo/internal/test/cgo_test.go b/src/cmd/cgo/internal/test/cgo_test.go index 5393552e07a..04e06cf95ec 100644 --- a/src/cmd/cgo/internal/test/cgo_test.go +++ b/src/cmd/cgo/internal/test/cgo_test.go @@ -106,6 +106,7 @@ func TestSetEnv(t *testing.T) { testSetEnv(t) } func TestThreadLock(t *testing.T) { testThreadLockFunc(t) } func TestUnsignedInt(t *testing.T) { testUnsignedInt(t) } func TestZeroArgCallback(t *testing.T) { testZeroArgCallback(t) } +func Test76340(t *testing.T) { test76340(t) } func BenchmarkCgoCall(b *testing.B) { benchCgoCall(b) } func BenchmarkGoString(b *testing.B) { benchGoString(b) } diff --git a/src/cmd/cgo/internal/test/test.go b/src/cmd/cgo/internal/test/test.go index e83e367174a..4dd14facb50 100644 --- a/src/cmd/cgo/internal/test/test.go +++ b/src/cmd/cgo/internal/test/test.go @@ -959,6 +959,18 @@ char * const issue75751p = &issue75751v; #define issue75751m issue75751p char * const volatile issue75751p2 = &issue75751v; #define issue75751m2 issue75751p2 + +typedef struct { void *t; void *v; } GoInterface; +extern int exportAny76340Param(GoInterface); +extern GoInterface exportAny76340Return(int); + +int issue76340testFromC(GoInterface obj) { + return exportAny76340Param(obj); +} + +GoInterface issue76340returnFromC(int val) { + return exportAny76340Return(val); +} */ import "C" @@ -2407,3 +2419,22 @@ func test69086(t *testing.T) { func test75751() int { return int(*C.issue75751m) + int(*C.issue75751m2) } + +// Issue 76340. +func test76340(t *testing.T) { + var emptyInterface C.GoInterface + r1 := C.issue76340testFromC(emptyInterface) + if r1 != 0 { + t.Errorf("issue76340testFromC with nil interface: got %d, want 0", r1) + } + + r2 := C.issue76340returnFromC(42) + if r2.t == nil && r2.v == nil { + t.Error("issue76340returnFromC(42) returned nil interface") + } + + r3 := C.issue76340returnFromC(0) + if r3.t != nil || r3.v != nil { + t.Errorf("issue76340returnFromC(0) returned non-nil interface: got %v, want nil", r3) + } +} diff --git a/src/cmd/cgo/internal/test/testx.go b/src/cmd/cgo/internal/test/testx.go index 9a63b9e1008..21ba52260ef 100644 --- a/src/cmd/cgo/internal/test/testx.go +++ b/src/cmd/cgo/internal/test/testx.go @@ -595,3 +595,21 @@ func test49633(t *testing.T) { t.Errorf("msg = %q, want 'hello'", v.msg) } } + +//export exportAny76340Param +func exportAny76340Param(obj any) C.int { + if obj == nil { + return 0 + } + + return 1 +} + +//export exportAny76340Return +func exportAny76340Return(val C.int) any { + if val == 0 { + return nil + } + + return int(val) +} diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go index 00c9e8c9a3e..dc1e5b29e59 100644 --- a/src/cmd/cgo/out.go +++ b/src/cmd/cgo/out.go @@ -1558,6 +1558,9 @@ func (p *Package) doCgoType(e ast.Expr, m map[ast.Expr]bool) *Type { if t.Name == "error" { return &Type{Size: 2 * p.PtrSize, Align: p.PtrSize, C: c("GoInterface")} } + if t.Name == "any" { + return &Type{Size: 2 * p.PtrSize, Align: p.PtrSize, C: c("GoInterface")} + } if r, ok := goTypes[t.Name]; ok { return goTypesFixup(r) } diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go index f01353ffa6c..55e7bdbdb55 100644 --- a/src/runtime/cgocall.go +++ b/src/runtime/cgocall.go @@ -796,6 +796,9 @@ func cgoCheckResult(val any) { ep := efaceOf(&val) t := ep._type + if t == nil { + return + } cgoCheckArg(t, ep.data, !t.IsDirectIface(), false, cgoResultFail) } From 02d1f3a06bc6900ad5c1b7c11b1fd38cbddef395 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 13 Feb 2025 15:59:32 -0800 Subject: [PATCH 15/15] runtime: respect GOTRACEBACK for user-triggered runtime panics The documentation for GOTRACEBACK says that "single" is the default where the stack trace for only a single routine is printed except that it prints all stack traces if: there is no current goroutine or the failure is internal to the run-time. In the runtime, there are two types of panics: throwTypeUser and throwTypeRuntime. The latter more clearly corresponds to a "failure [that] is internal to the run-time", while the former corresponds to a problem trigger due to a user mistake. Thus, a user-triggered panic (e.g., concurrent map access) should not result in a dump of all stack traces. Fixes #68019 Change-Id: I9b02f82535ddb9fd666f7158e2e4ee10f235646a Reviewed-on: https://go-review.googlesource.com/c/go/+/649535 LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt Reviewed-by: Michael Knyszek --- src/runtime/runtime1.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 43e4c142362..64ee4c8d2e9 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -39,7 +39,7 @@ func gotraceback() (level int32, all, crash bool) { gp := getg() t := atomic.Load(&traceback_cache) crash = t&tracebackCrash != 0 - all = gp.m.throwing >= throwTypeUser || t&tracebackAll != 0 + all = gp.m.throwing > throwTypeUser || t&tracebackAll != 0 if gp.m.traceback != 0 { level = int32(gp.m.traceback) } else if gp.m.throwing >= throwTypeRuntime {