mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: save lasx and lsx registers in loong64 async preemption
This is a port of CL 669195 and CL 695916 adjusted to save loong64 lasx and lsx registers off stack. Change-Id: Ie56787c76259a9545f5a8adcb09f588c8451bbd6 Reviewed-on: https://go-review.googlesource.com/c/go/+/711180 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Meidan Li <limeidan@loongson.cn>
This commit is contained in:
parent
79ae97fe9b
commit
99cf4d671c
5 changed files with 339 additions and 80 deletions
|
|
@ -713,10 +713,11 @@ func genMIPS(g *gen, _64bit bool) {
|
|||
}
|
||||
|
||||
func genLoong64(g *gen) {
|
||||
p := g.p
|
||||
const xReg = "R4" // *xRegState
|
||||
|
||||
p, label := g.p, g.label
|
||||
|
||||
mov := "MOVV"
|
||||
movf := "MOVD"
|
||||
add := "ADDV"
|
||||
sub := "SUBV"
|
||||
regsize := 8
|
||||
|
|
@ -732,12 +733,6 @@ func genLoong64(g *gen) {
|
|||
l.add(mov, reg, regsize)
|
||||
}
|
||||
|
||||
// Add floating point registers F0-F31.
|
||||
for i := 0; i <= 31; i++ {
|
||||
reg := fmt.Sprintf("F%d", i)
|
||||
l.add(movf, reg, regsize)
|
||||
}
|
||||
|
||||
// Add condition flag register fcc0-fcc7
|
||||
sv := ""
|
||||
rs := ""
|
||||
|
|
@ -764,12 +759,80 @@ func genLoong64(g *gen) {
|
|||
mov+" %d(R3), R5\n"+rs,
|
||||
regsize)
|
||||
|
||||
// Create layouts for lasx, lsx and fp registers.
|
||||
lasxRegs := layout{sp: xReg}
|
||||
lsxRegs := lasxRegs
|
||||
fpRegs := lasxRegs
|
||||
for i := 0; i <= 31; i++ {
|
||||
lasxRegs.add("XVMOVQ", fmt.Sprintf("X%d", i), 256/8)
|
||||
lsxRegs.add("VMOVQ", fmt.Sprintf("V%d", i), 128/8)
|
||||
fpRegs.add("MOVD", fmt.Sprintf("F%d", i), 64/8)
|
||||
}
|
||||
|
||||
for i := range lsxRegs.regs {
|
||||
for j := range lsxRegs.regs[i].regs {
|
||||
lsxRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos
|
||||
fpRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos
|
||||
}
|
||||
}
|
||||
writeXRegs(g.goarch, &lasxRegs)
|
||||
|
||||
// allocate frame, save PC of interrupted instruction (in LR)
|
||||
p(mov+" R1, -%d(R3)", l.stack)
|
||||
p(sub+" $%d, R3", l.stack)
|
||||
|
||||
p("// Save GPs")
|
||||
l.save(g)
|
||||
|
||||
p("// Save extended register state to p.xRegs.scratch")
|
||||
p("MOVV g_m(g), %s", xReg)
|
||||
p("MOVV m_p(%s), %s", xReg, xReg)
|
||||
p("ADDV $(p_xRegs+xRegPerP_scratch), %s, %s", xReg, xReg)
|
||||
|
||||
p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5")
|
||||
p("BNE R5, saveLASX")
|
||||
|
||||
p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5")
|
||||
p("BNE R5, saveLSX")
|
||||
|
||||
label("saveFP:")
|
||||
fpRegs.save(g)
|
||||
p("JMP preempt")
|
||||
|
||||
label("saveLSX:")
|
||||
lsxRegs.save(g)
|
||||
p("JMP preempt")
|
||||
|
||||
label("saveLASX:")
|
||||
lasxRegs.save(g)
|
||||
|
||||
label("preempt:")
|
||||
p("CALL ·asyncPreempt2(SB)")
|
||||
|
||||
p("// Restore non-GPs from *p.xRegs.cache")
|
||||
p("MOVV g_m(g), %s", xReg)
|
||||
p("MOVV m_p(%s), %s", xReg, xReg)
|
||||
p("MOVV (p_xRegs+xRegPerP_cache)(%s), %s", xReg, xReg)
|
||||
|
||||
p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5")
|
||||
p("BNE R5, restoreLASX")
|
||||
|
||||
p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5")
|
||||
p("BNE R5, restoreLSX")
|
||||
|
||||
label("restoreFP:")
|
||||
fpRegs.restore(g)
|
||||
p("JMP restoreGPs")
|
||||
|
||||
label("restoreLSX:")
|
||||
lsxRegs.restore(g)
|
||||
p("JMP restoreGPs")
|
||||
|
||||
label("restoreLASX:")
|
||||
lasxRegs.restore(g)
|
||||
|
||||
p("// Restore GPs")
|
||||
label("restoreGPs:")
|
||||
l.restore(g)
|
||||
|
||||
p(mov+" %d(R3), R1", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
|
||||
|
|
|
|||
38
src/runtime/preempt_loong64.go
Normal file
38
src/runtime/preempt_loong64.go
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
// Code generated by mkpreempt.go; DO NOT EDIT.
|
||||
|
||||
package runtime
|
||||
|
||||
type xRegs struct {
|
||||
X0 [32]byte
|
||||
X1 [32]byte
|
||||
X2 [32]byte
|
||||
X3 [32]byte
|
||||
X4 [32]byte
|
||||
X5 [32]byte
|
||||
X6 [32]byte
|
||||
X7 [32]byte
|
||||
X8 [32]byte
|
||||
X9 [32]byte
|
||||
X10 [32]byte
|
||||
X11 [32]byte
|
||||
X12 [32]byte
|
||||
X13 [32]byte
|
||||
X14 [32]byte
|
||||
X15 [32]byte
|
||||
X16 [32]byte
|
||||
X17 [32]byte
|
||||
X18 [32]byte
|
||||
X19 [32]byte
|
||||
X20 [32]byte
|
||||
X21 [32]byte
|
||||
X22 [32]byte
|
||||
X23 [32]byte
|
||||
X24 [32]byte
|
||||
X25 [32]byte
|
||||
X26 [32]byte
|
||||
X27 [32]byte
|
||||
X28 [32]byte
|
||||
X29 [32]byte
|
||||
X30 [32]byte
|
||||
X31 [32]byte
|
||||
}
|
||||
|
|
@ -4,8 +4,9 @@
|
|||
#include "textflag.h"
|
||||
|
||||
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
||||
MOVV R1, -480(R3)
|
||||
SUBV $480, R3
|
||||
MOVV R1, -224(R3)
|
||||
SUBV $224, R3
|
||||
// Save GPs
|
||||
MOVV R4, 8(R3)
|
||||
MOVV R5, 16(R3)
|
||||
MOVV R6, 24(R3)
|
||||
|
|
@ -32,38 +33,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOVV R28, 192(R3)
|
||||
MOVV R29, 200(R3)
|
||||
MOVV R31, 208(R3)
|
||||
MOVD F0, 216(R3)
|
||||
MOVD F1, 224(R3)
|
||||
MOVD F2, 232(R3)
|
||||
MOVD F3, 240(R3)
|
||||
MOVD F4, 248(R3)
|
||||
MOVD F5, 256(R3)
|
||||
MOVD F6, 264(R3)
|
||||
MOVD F7, 272(R3)
|
||||
MOVD F8, 280(R3)
|
||||
MOVD F9, 288(R3)
|
||||
MOVD F10, 296(R3)
|
||||
MOVD F11, 304(R3)
|
||||
MOVD F12, 312(R3)
|
||||
MOVD F13, 320(R3)
|
||||
MOVD F14, 328(R3)
|
||||
MOVD F15, 336(R3)
|
||||
MOVD F16, 344(R3)
|
||||
MOVD F17, 352(R3)
|
||||
MOVD F18, 360(R3)
|
||||
MOVD F19, 368(R3)
|
||||
MOVD F20, 376(R3)
|
||||
MOVD F21, 384(R3)
|
||||
MOVD F22, 392(R3)
|
||||
MOVD F23, 400(R3)
|
||||
MOVD F24, 408(R3)
|
||||
MOVD F25, 416(R3)
|
||||
MOVD F26, 424(R3)
|
||||
MOVD F27, 432(R3)
|
||||
MOVD F28, 440(R3)
|
||||
MOVD F29, 448(R3)
|
||||
MOVD F30, 456(R3)
|
||||
MOVD F31, 464(R3)
|
||||
MOVV FCC0, R4
|
||||
BSTRINSV $7, R4, $0, R5
|
||||
MOVV FCC1, R4
|
||||
|
|
@ -80,9 +49,230 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
|||
BSTRINSV $55, R4, $48, R5
|
||||
MOVV FCC7, R4
|
||||
BSTRINSV $63, R4, $56, R5
|
||||
MOVV R5, 472(R3)
|
||||
MOVV R5, 216(R3)
|
||||
// Save extended register state to p.xRegs.scratch
|
||||
MOVV g_m(g), R4
|
||||
MOVV m_p(R4), R4
|
||||
ADDV $(p_xRegs+xRegPerP_scratch), R4, R4
|
||||
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5
|
||||
BNE R5, saveLASX
|
||||
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5
|
||||
BNE R5, saveLSX
|
||||
saveFP:
|
||||
MOVD F0, 0(R4)
|
||||
MOVD F1, 32(R4)
|
||||
MOVD F2, 64(R4)
|
||||
MOVD F3, 96(R4)
|
||||
MOVD F4, 128(R4)
|
||||
MOVD F5, 160(R4)
|
||||
MOVD F6, 192(R4)
|
||||
MOVD F7, 224(R4)
|
||||
MOVD F8, 256(R4)
|
||||
MOVD F9, 288(R4)
|
||||
MOVD F10, 320(R4)
|
||||
MOVD F11, 352(R4)
|
||||
MOVD F12, 384(R4)
|
||||
MOVD F13, 416(R4)
|
||||
MOVD F14, 448(R4)
|
||||
MOVD F15, 480(R4)
|
||||
MOVD F16, 512(R4)
|
||||
MOVD F17, 544(R4)
|
||||
MOVD F18, 576(R4)
|
||||
MOVD F19, 608(R4)
|
||||
MOVD F20, 640(R4)
|
||||
MOVD F21, 672(R4)
|
||||
MOVD F22, 704(R4)
|
||||
MOVD F23, 736(R4)
|
||||
MOVD F24, 768(R4)
|
||||
MOVD F25, 800(R4)
|
||||
MOVD F26, 832(R4)
|
||||
MOVD F27, 864(R4)
|
||||
MOVD F28, 896(R4)
|
||||
MOVD F29, 928(R4)
|
||||
MOVD F30, 960(R4)
|
||||
MOVD F31, 992(R4)
|
||||
JMP preempt
|
||||
saveLSX:
|
||||
VMOVQ V0, 0(R4)
|
||||
VMOVQ V1, 32(R4)
|
||||
VMOVQ V2, 64(R4)
|
||||
VMOVQ V3, 96(R4)
|
||||
VMOVQ V4, 128(R4)
|
||||
VMOVQ V5, 160(R4)
|
||||
VMOVQ V6, 192(R4)
|
||||
VMOVQ V7, 224(R4)
|
||||
VMOVQ V8, 256(R4)
|
||||
VMOVQ V9, 288(R4)
|
||||
VMOVQ V10, 320(R4)
|
||||
VMOVQ V11, 352(R4)
|
||||
VMOVQ V12, 384(R4)
|
||||
VMOVQ V13, 416(R4)
|
||||
VMOVQ V14, 448(R4)
|
||||
VMOVQ V15, 480(R4)
|
||||
VMOVQ V16, 512(R4)
|
||||
VMOVQ V17, 544(R4)
|
||||
VMOVQ V18, 576(R4)
|
||||
VMOVQ V19, 608(R4)
|
||||
VMOVQ V20, 640(R4)
|
||||
VMOVQ V21, 672(R4)
|
||||
VMOVQ V22, 704(R4)
|
||||
VMOVQ V23, 736(R4)
|
||||
VMOVQ V24, 768(R4)
|
||||
VMOVQ V25, 800(R4)
|
||||
VMOVQ V26, 832(R4)
|
||||
VMOVQ V27, 864(R4)
|
||||
VMOVQ V28, 896(R4)
|
||||
VMOVQ V29, 928(R4)
|
||||
VMOVQ V30, 960(R4)
|
||||
VMOVQ V31, 992(R4)
|
||||
JMP preempt
|
||||
saveLASX:
|
||||
XVMOVQ X0, 0(R4)
|
||||
XVMOVQ X1, 32(R4)
|
||||
XVMOVQ X2, 64(R4)
|
||||
XVMOVQ X3, 96(R4)
|
||||
XVMOVQ X4, 128(R4)
|
||||
XVMOVQ X5, 160(R4)
|
||||
XVMOVQ X6, 192(R4)
|
||||
XVMOVQ X7, 224(R4)
|
||||
XVMOVQ X8, 256(R4)
|
||||
XVMOVQ X9, 288(R4)
|
||||
XVMOVQ X10, 320(R4)
|
||||
XVMOVQ X11, 352(R4)
|
||||
XVMOVQ X12, 384(R4)
|
||||
XVMOVQ X13, 416(R4)
|
||||
XVMOVQ X14, 448(R4)
|
||||
XVMOVQ X15, 480(R4)
|
||||
XVMOVQ X16, 512(R4)
|
||||
XVMOVQ X17, 544(R4)
|
||||
XVMOVQ X18, 576(R4)
|
||||
XVMOVQ X19, 608(R4)
|
||||
XVMOVQ X20, 640(R4)
|
||||
XVMOVQ X21, 672(R4)
|
||||
XVMOVQ X22, 704(R4)
|
||||
XVMOVQ X23, 736(R4)
|
||||
XVMOVQ X24, 768(R4)
|
||||
XVMOVQ X25, 800(R4)
|
||||
XVMOVQ X26, 832(R4)
|
||||
XVMOVQ X27, 864(R4)
|
||||
XVMOVQ X28, 896(R4)
|
||||
XVMOVQ X29, 928(R4)
|
||||
XVMOVQ X30, 960(R4)
|
||||
XVMOVQ X31, 992(R4)
|
||||
preempt:
|
||||
CALL ·asyncPreempt2(SB)
|
||||
MOVV 472(R3), R5
|
||||
// Restore non-GPs from *p.xRegs.cache
|
||||
MOVV g_m(g), R4
|
||||
MOVV m_p(R4), R4
|
||||
MOVV (p_xRegs+xRegPerP_cache)(R4), R4
|
||||
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5
|
||||
BNE R5, restoreLASX
|
||||
MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5
|
||||
BNE R5, restoreLSX
|
||||
restoreFP:
|
||||
MOVD 992(R4), F31
|
||||
MOVD 960(R4), F30
|
||||
MOVD 928(R4), F29
|
||||
MOVD 896(R4), F28
|
||||
MOVD 864(R4), F27
|
||||
MOVD 832(R4), F26
|
||||
MOVD 800(R4), F25
|
||||
MOVD 768(R4), F24
|
||||
MOVD 736(R4), F23
|
||||
MOVD 704(R4), F22
|
||||
MOVD 672(R4), F21
|
||||
MOVD 640(R4), F20
|
||||
MOVD 608(R4), F19
|
||||
MOVD 576(R4), F18
|
||||
MOVD 544(R4), F17
|
||||
MOVD 512(R4), F16
|
||||
MOVD 480(R4), F15
|
||||
MOVD 448(R4), F14
|
||||
MOVD 416(R4), F13
|
||||
MOVD 384(R4), F12
|
||||
MOVD 352(R4), F11
|
||||
MOVD 320(R4), F10
|
||||
MOVD 288(R4), F9
|
||||
MOVD 256(R4), F8
|
||||
MOVD 224(R4), F7
|
||||
MOVD 192(R4), F6
|
||||
MOVD 160(R4), F5
|
||||
MOVD 128(R4), F4
|
||||
MOVD 96(R4), F3
|
||||
MOVD 64(R4), F2
|
||||
MOVD 32(R4), F1
|
||||
MOVD 0(R4), F0
|
||||
JMP restoreGPs
|
||||
restoreLSX:
|
||||
VMOVQ 992(R4), V31
|
||||
VMOVQ 960(R4), V30
|
||||
VMOVQ 928(R4), V29
|
||||
VMOVQ 896(R4), V28
|
||||
VMOVQ 864(R4), V27
|
||||
VMOVQ 832(R4), V26
|
||||
VMOVQ 800(R4), V25
|
||||
VMOVQ 768(R4), V24
|
||||
VMOVQ 736(R4), V23
|
||||
VMOVQ 704(R4), V22
|
||||
VMOVQ 672(R4), V21
|
||||
VMOVQ 640(R4), V20
|
||||
VMOVQ 608(R4), V19
|
||||
VMOVQ 576(R4), V18
|
||||
VMOVQ 544(R4), V17
|
||||
VMOVQ 512(R4), V16
|
||||
VMOVQ 480(R4), V15
|
||||
VMOVQ 448(R4), V14
|
||||
VMOVQ 416(R4), V13
|
||||
VMOVQ 384(R4), V12
|
||||
VMOVQ 352(R4), V11
|
||||
VMOVQ 320(R4), V10
|
||||
VMOVQ 288(R4), V9
|
||||
VMOVQ 256(R4), V8
|
||||
VMOVQ 224(R4), V7
|
||||
VMOVQ 192(R4), V6
|
||||
VMOVQ 160(R4), V5
|
||||
VMOVQ 128(R4), V4
|
||||
VMOVQ 96(R4), V3
|
||||
VMOVQ 64(R4), V2
|
||||
VMOVQ 32(R4), V1
|
||||
VMOVQ 0(R4), V0
|
||||
JMP restoreGPs
|
||||
restoreLASX:
|
||||
XVMOVQ 992(R4), X31
|
||||
XVMOVQ 960(R4), X30
|
||||
XVMOVQ 928(R4), X29
|
||||
XVMOVQ 896(R4), X28
|
||||
XVMOVQ 864(R4), X27
|
||||
XVMOVQ 832(R4), X26
|
||||
XVMOVQ 800(R4), X25
|
||||
XVMOVQ 768(R4), X24
|
||||
XVMOVQ 736(R4), X23
|
||||
XVMOVQ 704(R4), X22
|
||||
XVMOVQ 672(R4), X21
|
||||
XVMOVQ 640(R4), X20
|
||||
XVMOVQ 608(R4), X19
|
||||
XVMOVQ 576(R4), X18
|
||||
XVMOVQ 544(R4), X17
|
||||
XVMOVQ 512(R4), X16
|
||||
XVMOVQ 480(R4), X15
|
||||
XVMOVQ 448(R4), X14
|
||||
XVMOVQ 416(R4), X13
|
||||
XVMOVQ 384(R4), X12
|
||||
XVMOVQ 352(R4), X11
|
||||
XVMOVQ 320(R4), X10
|
||||
XVMOVQ 288(R4), X9
|
||||
XVMOVQ 256(R4), X8
|
||||
XVMOVQ 224(R4), X7
|
||||
XVMOVQ 192(R4), X6
|
||||
XVMOVQ 160(R4), X5
|
||||
XVMOVQ 128(R4), X4
|
||||
XVMOVQ 96(R4), X3
|
||||
XVMOVQ 64(R4), X2
|
||||
XVMOVQ 32(R4), X1
|
||||
XVMOVQ 0(R4), X0
|
||||
// Restore GPs
|
||||
restoreGPs:
|
||||
MOVV 216(R3), R5
|
||||
BSTRPICKV $7, R5, $0, R4
|
||||
MOVV R4, FCC0
|
||||
BSTRPICKV $15, R5, $8, R4
|
||||
|
|
@ -99,38 +289,6 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOVV R4, FCC6
|
||||
BSTRPICKV $63, R5, $56, R4
|
||||
MOVV R4, FCC7
|
||||
MOVD 464(R3), F31
|
||||
MOVD 456(R3), F30
|
||||
MOVD 448(R3), F29
|
||||
MOVD 440(R3), F28
|
||||
MOVD 432(R3), F27
|
||||
MOVD 424(R3), F26
|
||||
MOVD 416(R3), F25
|
||||
MOVD 408(R3), F24
|
||||
MOVD 400(R3), F23
|
||||
MOVD 392(R3), F22
|
||||
MOVD 384(R3), F21
|
||||
MOVD 376(R3), F20
|
||||
MOVD 368(R3), F19
|
||||
MOVD 360(R3), F18
|
||||
MOVD 352(R3), F17
|
||||
MOVD 344(R3), F16
|
||||
MOVD 336(R3), F15
|
||||
MOVD 328(R3), F14
|
||||
MOVD 320(R3), F13
|
||||
MOVD 312(R3), F12
|
||||
MOVD 304(R3), F11
|
||||
MOVD 296(R3), F10
|
||||
MOVD 288(R3), F9
|
||||
MOVD 280(R3), F8
|
||||
MOVD 272(R3), F7
|
||||
MOVD 264(R3), F6
|
||||
MOVD 256(R3), F5
|
||||
MOVD 248(R3), F4
|
||||
MOVD 240(R3), F3
|
||||
MOVD 232(R3), F2
|
||||
MOVD 224(R3), F1
|
||||
MOVD 216(R3), F0
|
||||
MOVV 208(R3), R31
|
||||
MOVV 200(R3), R29
|
||||
MOVV 192(R3), R28
|
||||
|
|
@ -157,7 +315,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOVV 24(R3), R6
|
||||
MOVV 16(R3), R5
|
||||
MOVV 8(R3), R4
|
||||
MOVV 480(R3), R1
|
||||
MOVV 224(R3), R1
|
||||
MOVV (R3), R30
|
||||
ADDV $488, R3
|
||||
ADDV $232, R3
|
||||
JMP (R30)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !amd64 && !arm64
|
||||
//go:build !amd64 && !arm64 && !loong64
|
||||
|
||||
// This provides common support for architectures that DO NOT use extended
|
||||
// register state in asynchronous preemption.
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build amd64 || arm64
|
||||
//go:build amd64 || arm64 || loong64
|
||||
|
||||
// This provides common support for architectures that use extended register
|
||||
// state in asynchronous preemption.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue