[dev.simd] runtime: save Z16-Z31 registers in async preempt

The register allocation will use the upper register soon, this CL is to
enable that.

Change-Id: I4d7285e08b95f4e6ebee72594dfbe8d1199f09ed
Reviewed-on: https://go-review.googlesource.com/c/go/+/686498
TryBot-Bypass: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Commit-Queue: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao 2025-07-08 19:24:30 +00:00
parent 5429328b0c
commit 574854fd86
3 changed files with 65 additions and 17 deletions

View file

@ -300,7 +300,7 @@ func genAMD64(g *gen) {
// Create layouts for X, Y, and Z registers.
const (
numXRegs = 16
numZRegs = 16 // TODO: If we start using upper registers, change to 32
numZRegs = 32
numKRegs = 8
)
lZRegs := layout{sp: xReg} // Non-GP registers

View file

@ -19,6 +19,22 @@ type xRegs struct {
Z13 [64]byte
Z14 [64]byte
Z15 [64]byte
Z16 [64]byte
Z17 [64]byte
Z18 [64]byte
Z19 [64]byte
Z20 [64]byte
Z21 [64]byte
Z22 [64]byte
Z23 [64]byte
Z24 [64]byte
Z25 [64]byte
Z26 [64]byte
Z27 [64]byte
Z28 [64]byte
Z29 [64]byte
Z30 [64]byte
Z31 [64]byte
K0 uint64
K1 uint64
K2 uint64

View file

@ -95,14 +95,30 @@ saveAVX512:
VMOVDQU64 Z13, 832(AX)
VMOVDQU64 Z14, 896(AX)
VMOVDQU64 Z15, 960(AX)
KMOVQ K0, 1024(AX)
KMOVQ K1, 1032(AX)
KMOVQ K2, 1040(AX)
KMOVQ K3, 1048(AX)
KMOVQ K4, 1056(AX)
KMOVQ K5, 1064(AX)
KMOVQ K6, 1072(AX)
KMOVQ K7, 1080(AX)
VMOVDQU64 Z16, 1024(AX)
VMOVDQU64 Z17, 1088(AX)
VMOVDQU64 Z18, 1152(AX)
VMOVDQU64 Z19, 1216(AX)
VMOVDQU64 Z20, 1280(AX)
VMOVDQU64 Z21, 1344(AX)
VMOVDQU64 Z22, 1408(AX)
VMOVDQU64 Z23, 1472(AX)
VMOVDQU64 Z24, 1536(AX)
VMOVDQU64 Z25, 1600(AX)
VMOVDQU64 Z26, 1664(AX)
VMOVDQU64 Z27, 1728(AX)
VMOVDQU64 Z28, 1792(AX)
VMOVDQU64 Z29, 1856(AX)
VMOVDQU64 Z30, 1920(AX)
VMOVDQU64 Z31, 1984(AX)
KMOVQ K0, 2048(AX)
KMOVQ K1, 2056(AX)
KMOVQ K2, 2064(AX)
KMOVQ K3, 2072(AX)
KMOVQ K4, 2080(AX)
KMOVQ K5, 2088(AX)
KMOVQ K6, 2096(AX)
KMOVQ K7, 2104(AX)
JMP preempt
preempt:
CALL ·asyncPreempt2(SB)
@ -153,14 +169,30 @@ restoreAVX2:
VMOVDQU 0(AX), Y0
JMP restoreGPs
restoreAVX512:
KMOVQ 1080(AX), K7
KMOVQ 1072(AX), K6
KMOVQ 1064(AX), K5
KMOVQ 1056(AX), K4
KMOVQ 1048(AX), K3
KMOVQ 1040(AX), K2
KMOVQ 1032(AX), K1
KMOVQ 1024(AX), K0
KMOVQ 2104(AX), K7
KMOVQ 2096(AX), K6
KMOVQ 2088(AX), K5
KMOVQ 2080(AX), K4
KMOVQ 2072(AX), K3
KMOVQ 2064(AX), K2
KMOVQ 2056(AX), K1
KMOVQ 2048(AX), K0
VMOVDQU64 1984(AX), Z31
VMOVDQU64 1920(AX), Z30
VMOVDQU64 1856(AX), Z29
VMOVDQU64 1792(AX), Z28
VMOVDQU64 1728(AX), Z27
VMOVDQU64 1664(AX), Z26
VMOVDQU64 1600(AX), Z25
VMOVDQU64 1536(AX), Z24
VMOVDQU64 1472(AX), Z23
VMOVDQU64 1408(AX), Z22
VMOVDQU64 1344(AX), Z21
VMOVDQU64 1280(AX), Z20
VMOVDQU64 1216(AX), Z19
VMOVDQU64 1152(AX), Z18
VMOVDQU64 1088(AX), Z17
VMOVDQU64 1024(AX), Z16
VMOVDQU64 960(AX), Z15
VMOVDQU64 896(AX), Z14
VMOVDQU64 832(AX), Z13