diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 7b84ba0a6fb..dc951cd9a99 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -713,10 +713,11 @@ func genMIPS(g *gen, _64bit bool) { } func genLoong64(g *gen) { - p := g.p + const xReg = "R4" // *xRegState + + p, label := g.p, g.label mov := "MOVV" - movf := "MOVD" add := "ADDV" sub := "SUBV" regsize := 8 @@ -732,12 +733,6 @@ func genLoong64(g *gen) { l.add(mov, reg, regsize) } - // Add floating point registers F0-F31. - for i := 0; i <= 31; i++ { - reg := fmt.Sprintf("F%d", i) - l.add(movf, reg, regsize) - } - // Add condition flag register fcc0-fcc7 sv := "" rs := "" @@ -764,12 +759,80 @@ func genLoong64(g *gen) { mov+" %d(R3), R5\n"+rs, regsize) + // Create layouts for lasx, lsx and fp registers. + lasxRegs := layout{sp: xReg} + lsxRegs := lasxRegs + fpRegs := lasxRegs + for i := 0; i <= 31; i++ { + lasxRegs.add("XVMOVQ", fmt.Sprintf("X%d", i), 256/8) + lsxRegs.add("VMOVQ", fmt.Sprintf("V%d", i), 128/8) + fpRegs.add("MOVD", fmt.Sprintf("F%d", i), 64/8) + } + + for i := range lsxRegs.regs { + for j := range lsxRegs.regs[i].regs { + lsxRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos + fpRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos + } + } + writeXRegs(g.goarch, &lasxRegs) + // allocate frame, save PC of interrupted instruction (in LR) p(mov+" R1, -%d(R3)", l.stack) p(sub+" $%d, R3", l.stack) + p("// Save GPs") l.save(g) + + p("// Save extended register state to p.xRegs.scratch") + p("MOVV g_m(g), %s", xReg) + p("MOVV m_p(%s), %s", xReg, xReg) + p("ADDV $(p_xRegs+xRegPerP_scratch), %s, %s", xReg, xReg) + + p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5") + p("BNE R5, saveLASX") + + p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5") + p("BNE R5, saveLSX") + + label("saveFP:") + fpRegs.save(g) + p("JMP preempt") + + label("saveLSX:") + lsxRegs.save(g) + p("JMP preempt") + + label("saveLASX:") + lasxRegs.save(g) + + label("preempt:") p("CALL ·asyncPreempt2(SB)") + + p("// Restore non-GPs from *p.xRegs.cache") + p("MOVV g_m(g), %s", xReg) + p("MOVV m_p(%s), %s", xReg, xReg) + p("MOVV (p_xRegs+xRegPerP_cache)(%s), %s", xReg, xReg) + + p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5") + p("BNE R5, restoreLASX") + + p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5") + p("BNE R5, restoreLSX") + + label("restoreFP:") + fpRegs.restore(g) + p("JMP restoreGPs") + + label("restoreLSX:") + lsxRegs.restore(g) + p("JMP restoreGPs") + + label("restoreLASX:") + lasxRegs.restore(g) + + p("// Restore GPs") + label("restoreGPs:") l.restore(g) p(mov+" %d(R3), R1", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it diff --git a/src/runtime/preempt_loong64.go b/src/runtime/preempt_loong64.go new file mode 100644 index 00000000000..c7fec338f26 --- /dev/null +++ b/src/runtime/preempt_loong64.go @@ -0,0 +1,38 @@ +// Code generated by mkpreempt.go; DO NOT EDIT. + +package runtime + +type xRegs struct { + X0 [32]byte + X1 [32]byte + X2 [32]byte + X3 [32]byte + X4 [32]byte + X5 [32]byte + X6 [32]byte + X7 [32]byte + X8 [32]byte + X9 [32]byte + X10 [32]byte + X11 [32]byte + X12 [32]byte + X13 [32]byte + X14 [32]byte + X15 [32]byte + X16 [32]byte + X17 [32]byte + X18 [32]byte + X19 [32]byte + X20 [32]byte + X21 [32]byte + X22 [32]byte + X23 [32]byte + X24 [32]byte + X25 [32]byte + X26 [32]byte + X27 [32]byte + X28 [32]byte + X29 [32]byte + X30 [32]byte + X31 [32]byte +} diff --git a/src/runtime/preempt_loong64.s b/src/runtime/preempt_loong64.s index 626dc4b6f67..4bc7ea3947b 100644 --- a/src/runtime/preempt_loong64.s +++ b/src/runtime/preempt_loong64.s @@ -4,8 +4,9 @@ #include "textflag.h" TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 - MOVV R1, -480(R3) - SUBV $480, R3 + MOVV R1, -224(R3) + SUBV $224, R3 + // Save GPs MOVV R4, 8(R3) MOVV R5, 16(R3) MOVV R6, 24(R3) @@ -32,38 +33,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVV R28, 192(R3) MOVV R29, 200(R3) MOVV R31, 208(R3) - MOVD F0, 216(R3) - MOVD F1, 224(R3) - MOVD F2, 232(R3) - MOVD F3, 240(R3) - MOVD F4, 248(R3) - MOVD F5, 256(R3) - MOVD F6, 264(R3) - MOVD F7, 272(R3) - MOVD F8, 280(R3) - MOVD F9, 288(R3) - MOVD F10, 296(R3) - MOVD F11, 304(R3) - MOVD F12, 312(R3) - MOVD F13, 320(R3) - MOVD F14, 328(R3) - MOVD F15, 336(R3) - MOVD F16, 344(R3) - MOVD F17, 352(R3) - MOVD F18, 360(R3) - MOVD F19, 368(R3) - MOVD F20, 376(R3) - MOVD F21, 384(R3) - MOVD F22, 392(R3) - MOVD F23, 400(R3) - MOVD F24, 408(R3) - MOVD F25, 416(R3) - MOVD F26, 424(R3) - MOVD F27, 432(R3) - MOVD F28, 440(R3) - MOVD F29, 448(R3) - MOVD F30, 456(R3) - MOVD F31, 464(R3) MOVV FCC0, R4 BSTRINSV $7, R4, $0, R5 MOVV FCC1, R4 @@ -80,9 +49,230 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 BSTRINSV $55, R4, $48, R5 MOVV FCC7, R4 BSTRINSV $63, R4, $56, R5 - MOVV R5, 472(R3) + MOVV R5, 216(R3) + // Save extended register state to p.xRegs.scratch + MOVV g_m(g), R4 + MOVV m_p(R4), R4 + ADDV $(p_xRegs+xRegPerP_scratch), R4, R4 + MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5 + BNE R5, saveLASX + MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5 + BNE R5, saveLSX +saveFP: + MOVD F0, 0(R4) + MOVD F1, 32(R4) + MOVD F2, 64(R4) + MOVD F3, 96(R4) + MOVD F4, 128(R4) + MOVD F5, 160(R4) + MOVD F6, 192(R4) + MOVD F7, 224(R4) + MOVD F8, 256(R4) + MOVD F9, 288(R4) + MOVD F10, 320(R4) + MOVD F11, 352(R4) + MOVD F12, 384(R4) + MOVD F13, 416(R4) + MOVD F14, 448(R4) + MOVD F15, 480(R4) + MOVD F16, 512(R4) + MOVD F17, 544(R4) + MOVD F18, 576(R4) + MOVD F19, 608(R4) + MOVD F20, 640(R4) + MOVD F21, 672(R4) + MOVD F22, 704(R4) + MOVD F23, 736(R4) + MOVD F24, 768(R4) + MOVD F25, 800(R4) + MOVD F26, 832(R4) + MOVD F27, 864(R4) + MOVD F28, 896(R4) + MOVD F29, 928(R4) + MOVD F30, 960(R4) + MOVD F31, 992(R4) + JMP preempt +saveLSX: + VMOVQ V0, 0(R4) + VMOVQ V1, 32(R4) + VMOVQ V2, 64(R4) + VMOVQ V3, 96(R4) + VMOVQ V4, 128(R4) + VMOVQ V5, 160(R4) + VMOVQ V6, 192(R4) + VMOVQ V7, 224(R4) + VMOVQ V8, 256(R4) + VMOVQ V9, 288(R4) + VMOVQ V10, 320(R4) + VMOVQ V11, 352(R4) + VMOVQ V12, 384(R4) + VMOVQ V13, 416(R4) + VMOVQ V14, 448(R4) + VMOVQ V15, 480(R4) + VMOVQ V16, 512(R4) + VMOVQ V17, 544(R4) + VMOVQ V18, 576(R4) + VMOVQ V19, 608(R4) + VMOVQ V20, 640(R4) + VMOVQ V21, 672(R4) + VMOVQ V22, 704(R4) + VMOVQ V23, 736(R4) + VMOVQ V24, 768(R4) + VMOVQ V25, 800(R4) + VMOVQ V26, 832(R4) + VMOVQ V27, 864(R4) + VMOVQ V28, 896(R4) + VMOVQ V29, 928(R4) + VMOVQ V30, 960(R4) + VMOVQ V31, 992(R4) + JMP preempt +saveLASX: + XVMOVQ X0, 0(R4) + XVMOVQ X1, 32(R4) + XVMOVQ X2, 64(R4) + XVMOVQ X3, 96(R4) + XVMOVQ X4, 128(R4) + XVMOVQ X5, 160(R4) + XVMOVQ X6, 192(R4) + XVMOVQ X7, 224(R4) + XVMOVQ X8, 256(R4) + XVMOVQ X9, 288(R4) + XVMOVQ X10, 320(R4) + XVMOVQ X11, 352(R4) + XVMOVQ X12, 384(R4) + XVMOVQ X13, 416(R4) + XVMOVQ X14, 448(R4) + XVMOVQ X15, 480(R4) + XVMOVQ X16, 512(R4) + XVMOVQ X17, 544(R4) + XVMOVQ X18, 576(R4) + XVMOVQ X19, 608(R4) + XVMOVQ X20, 640(R4) + XVMOVQ X21, 672(R4) + XVMOVQ X22, 704(R4) + XVMOVQ X23, 736(R4) + XVMOVQ X24, 768(R4) + XVMOVQ X25, 800(R4) + XVMOVQ X26, 832(R4) + XVMOVQ X27, 864(R4) + XVMOVQ X28, 896(R4) + XVMOVQ X29, 928(R4) + XVMOVQ X30, 960(R4) + XVMOVQ X31, 992(R4) +preempt: CALL ·asyncPreempt2(SB) - MOVV 472(R3), R5 + // Restore non-GPs from *p.xRegs.cache + MOVV g_m(g), R4 + MOVV m_p(R4), R4 + MOVV (p_xRegs+xRegPerP_cache)(R4), R4 + MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5 + BNE R5, restoreLASX + MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5 + BNE R5, restoreLSX +restoreFP: + MOVD 992(R4), F31 + MOVD 960(R4), F30 + MOVD 928(R4), F29 + MOVD 896(R4), F28 + MOVD 864(R4), F27 + MOVD 832(R4), F26 + MOVD 800(R4), F25 + MOVD 768(R4), F24 + MOVD 736(R4), F23 + MOVD 704(R4), F22 + MOVD 672(R4), F21 + MOVD 640(R4), F20 + MOVD 608(R4), F19 + MOVD 576(R4), F18 + MOVD 544(R4), F17 + MOVD 512(R4), F16 + MOVD 480(R4), F15 + MOVD 448(R4), F14 + MOVD 416(R4), F13 + MOVD 384(R4), F12 + MOVD 352(R4), F11 + MOVD 320(R4), F10 + MOVD 288(R4), F9 + MOVD 256(R4), F8 + MOVD 224(R4), F7 + MOVD 192(R4), F6 + MOVD 160(R4), F5 + MOVD 128(R4), F4 + MOVD 96(R4), F3 + MOVD 64(R4), F2 + MOVD 32(R4), F1 + MOVD 0(R4), F0 + JMP restoreGPs +restoreLSX: + VMOVQ 992(R4), V31 + VMOVQ 960(R4), V30 + VMOVQ 928(R4), V29 + VMOVQ 896(R4), V28 + VMOVQ 864(R4), V27 + VMOVQ 832(R4), V26 + VMOVQ 800(R4), V25 + VMOVQ 768(R4), V24 + VMOVQ 736(R4), V23 + VMOVQ 704(R4), V22 + VMOVQ 672(R4), V21 + VMOVQ 640(R4), V20 + VMOVQ 608(R4), V19 + VMOVQ 576(R4), V18 + VMOVQ 544(R4), V17 + VMOVQ 512(R4), V16 + VMOVQ 480(R4), V15 + VMOVQ 448(R4), V14 + VMOVQ 416(R4), V13 + VMOVQ 384(R4), V12 + VMOVQ 352(R4), V11 + VMOVQ 320(R4), V10 + VMOVQ 288(R4), V9 + VMOVQ 256(R4), V8 + VMOVQ 224(R4), V7 + VMOVQ 192(R4), V6 + VMOVQ 160(R4), V5 + VMOVQ 128(R4), V4 + VMOVQ 96(R4), V3 + VMOVQ 64(R4), V2 + VMOVQ 32(R4), V1 + VMOVQ 0(R4), V0 + JMP restoreGPs +restoreLASX: + XVMOVQ 992(R4), X31 + XVMOVQ 960(R4), X30 + XVMOVQ 928(R4), X29 + XVMOVQ 896(R4), X28 + XVMOVQ 864(R4), X27 + XVMOVQ 832(R4), X26 + XVMOVQ 800(R4), X25 + XVMOVQ 768(R4), X24 + XVMOVQ 736(R4), X23 + XVMOVQ 704(R4), X22 + XVMOVQ 672(R4), X21 + XVMOVQ 640(R4), X20 + XVMOVQ 608(R4), X19 + XVMOVQ 576(R4), X18 + XVMOVQ 544(R4), X17 + XVMOVQ 512(R4), X16 + XVMOVQ 480(R4), X15 + XVMOVQ 448(R4), X14 + XVMOVQ 416(R4), X13 + XVMOVQ 384(R4), X12 + XVMOVQ 352(R4), X11 + XVMOVQ 320(R4), X10 + XVMOVQ 288(R4), X9 + XVMOVQ 256(R4), X8 + XVMOVQ 224(R4), X7 + XVMOVQ 192(R4), X6 + XVMOVQ 160(R4), X5 + XVMOVQ 128(R4), X4 + XVMOVQ 96(R4), X3 + XVMOVQ 64(R4), X2 + XVMOVQ 32(R4), X1 + XVMOVQ 0(R4), X0 + // Restore GPs +restoreGPs: + MOVV 216(R3), R5 BSTRPICKV $7, R5, $0, R4 MOVV R4, FCC0 BSTRPICKV $15, R5, $8, R4 @@ -99,38 +289,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVV R4, FCC6 BSTRPICKV $63, R5, $56, R4 MOVV R4, FCC7 - MOVD 464(R3), F31 - MOVD 456(R3), F30 - MOVD 448(R3), F29 - MOVD 440(R3), F28 - MOVD 432(R3), F27 - MOVD 424(R3), F26 - MOVD 416(R3), F25 - MOVD 408(R3), F24 - MOVD 400(R3), F23 - MOVD 392(R3), F22 - MOVD 384(R3), F21 - MOVD 376(R3), F20 - MOVD 368(R3), F19 - MOVD 360(R3), F18 - MOVD 352(R3), F17 - MOVD 344(R3), F16 - MOVD 336(R3), F15 - MOVD 328(R3), F14 - MOVD 320(R3), F13 - MOVD 312(R3), F12 - MOVD 304(R3), F11 - MOVD 296(R3), F10 - MOVD 288(R3), F9 - MOVD 280(R3), F8 - MOVD 272(R3), F7 - MOVD 264(R3), F6 - MOVD 256(R3), F5 - MOVD 248(R3), F4 - MOVD 240(R3), F3 - MOVD 232(R3), F2 - MOVD 224(R3), F1 - MOVD 216(R3), F0 MOVV 208(R3), R31 MOVV 200(R3), R29 MOVV 192(R3), R28 @@ -157,7 +315,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 MOVV 24(R3), R6 MOVV 16(R3), R5 MOVV 8(R3), R4 - MOVV 480(R3), R1 + MOVV 224(R3), R1 MOVV (R3), R30 - ADDV $488, R3 + ADDV $232, R3 JMP (R30) diff --git a/src/runtime/preempt_noxreg.go b/src/runtime/preempt_noxreg.go index 9f03b2b3334..977bf0bcec7 100644 --- a/src/runtime/preempt_noxreg.go +++ b/src/runtime/preempt_noxreg.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !amd64 && !arm64 +//go:build !amd64 && !arm64 && !loong64 // This provides common support for architectures that DO NOT use extended // register state in asynchronous preemption. diff --git a/src/runtime/preempt_xreg.go b/src/runtime/preempt_xreg.go index f4578a4d76d..cc52c5f3c4e 100644 --- a/src/runtime/preempt_xreg.go +++ b/src/runtime/preempt_xreg.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build amd64 || arm64 +//go:build amd64 || arm64 || loong64 // This provides common support for architectures that use extended register // state in asynchronous preemption.