internal/bytealg: port bytealg functions to reg ABI on s390x

This adds support for the reg ABI to the byte/string functions for
s390x. These are initially under control of the GOEXPERIMENT
macro until all changes are in.

Updates #40724

Change-Id: Ia3532523fe3a839cc0370d6fe1544972327be514
Reviewed-on: https://go-review.googlesource.com/c/go/+/719481
Reviewed-by: Vishwanatha HD <vishwanatha.hd@ibm.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Srinivas Pokala 2025-11-11 05:26:42 +01:00 committed by Keith Randall
parent 4529c8fba6
commit 2fe0ba8d52
3 changed files with 225 additions and 155 deletions

View file

@ -5,65 +5,93 @@
#include "go_asm.h" #include "go_asm.h"
#include "textflag.h" #include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R3 #ifndef GOEXPERIMENT_regabiargs
MOVD a_len+8(FP), R4 MOVD a_base+0(FP), R2
MOVD b_base+24(FP), R5 MOVD a_len+8(FP), R3
MOVD b_len+32(FP), R6 MOVD b_base+24(FP), R4
LA ret+48(FP), R7 MOVD b_len+32(FP), R5
LA ret+48(FP), R6
#else
// R2 = a_base
// R3 = a_len
// R4 = a_cap (unused)
// R5 = b_base (want in R4)
// R6 = b_len (want in R5)
// R7 = b_cap (unused)
MOVD R5, R4
MOVD R6, R5
#endif
BR cmpbody<>(SB) BR cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R3 #ifndef GOEXPERIMENT_regabiargs
MOVD a_len+8(FP), R4 MOVD a_base+0(FP), R2
MOVD b_base+16(FP), R5 MOVD a_len+8(FP), R3
MOVD b_len+24(FP), R6 MOVD b_base+16(FP), R4
LA ret+32(FP), R7 MOVD b_len+24(FP), R5
LA ret+32(FP), R6
#endif
// R2 = a_base
// R3 = a_len
// R4 = b_base
// R5 = b_len
BR cmpbody<>(SB) BR cmpbody<>(SB)
// input: // input:
// R3 = a // R2 = a
// R4 = alen // R3 = alen
// R5 = b // R4 = b
// R6 = blen // R5 = blen
// R7 = address of output word (stores -1/0/1 here) // For regabiargs output value( -1/0/1 ) stored in R2
// For !regabiargs address of output word( stores -1/0/1 ) stored in R6
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, cmplengths CMPBEQ R2, R4, cmplengths
MOVD R4, R8 MOVD R3, R7
CMPBLE R4, R6, amin CMPBLE R3, R5, amin
MOVD R6, R8 MOVD R5, R7
amin: amin:
CMPBEQ R8, $0, cmplengths CMPBEQ R7, $0, cmplengths
CMP R8, $256 CMP R7, $256
BLE tail BLE tail
loop: loop:
CLC $256, 0(R3), 0(R5) CLC $256, 0(R2), 0(R4)
BGT gt BGT gt
BLT lt BLT lt
SUB $256, R8 SUB $256, R7
MOVD $256(R3), R3 MOVD $256(R2), R2
MOVD $256(R5), R5 MOVD $256(R4), R4
CMP R8, $256 CMP R7, $256
BGT loop BGT loop
tail: tail:
SUB $1, R8 SUB $1, R7
EXRL $cmpbodyclc<>(SB), R8 EXRL $cmpbodyclc<>(SB), R7
BGT gt BGT gt
BLT lt BLT lt
cmplengths: cmplengths:
CMP R4, R6 CMP R3, R5
BEQ eq BEQ eq
BLT lt BLT lt
gt: gt:
MOVD $1, 0(R7) MOVD $1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R6)
#endif
RET RET
lt: lt:
MOVD $-1, 0(R7) MOVD $-1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R6)
#endif
RET RET
eq: eq:
MOVD $0, 0(R7) MOVD $0, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R6)
#endif
RET RET
TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5) CLC $1, 0(R2), 0(R4)
RET RET

View file

@ -6,80 +6,92 @@
#include "textflag.h" #include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool // memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
MOVD a+0(FP), R3 #ifndef GOEXPERIMENT_regabiargs
MOVD b+8(FP), R5 MOVD a+0(FP), R2
MOVD size+16(FP), R6 MOVD b+8(FP), R3
LA ret+24(FP), R7 MOVD size+16(FP), R4
LA ret+24(FP), R5
#endif
BR memeqbody<>(SB) BR memeqbody<>(SB)
// memequal_varlen(a, b unsafe.Pointer) bool // memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
MOVD a+0(FP), R3 #ifndef GOEXPERIMENT_regabiargs
MOVD b+8(FP), R5 MOVD a+0(FP), R2
MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure MOVD b+8(FP), R3
LA ret+16(FP), R7 LA ret+16(FP), R5
#endif
MOVD 8(R12), R4 // compiler stores size at offset 8 in the closure
BR memeqbody<>(SB) BR memeqbody<>(SB)
// input: // input:
// R3 = a // R2 = a
// R5 = b // R3 = b
// R6 = len // R4 = len
// R7 = address of output byte (stores 0 or 1 here) // For regabiargs output value( 0/1 ) stored in R2
// For !regabiargs address of output byte( stores 0/1 ) stored in R5
// a and b have the same length // a and b have the same length
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, equal CMPBEQ R2, R3, equal
loop: loop:
CMPBEQ R6, $0, equal CMPBEQ R4, $0, equal
CMPBLT R6, $32, tiny CMPBLT R4, $32, tiny
CMP R6, $256 CMP R4, $256
BLT tail BLT tail
CLC $256, 0(R3), 0(R5) CLC $256, 0(R2), 0(R3)
BNE notequal BNE notequal
SUB $256, R6 SUB $256, R4
LA 256(R2), R2
LA 256(R3), R3 LA 256(R3), R3
LA 256(R5), R5
BR loop BR loop
tail: tail:
SUB $1, R6, R8 SUB $1, R4, R8
EXRL $memeqbodyclc<>(SB), R8 EXRL $memeqbodyclc<>(SB), R8
BEQ equal BEQ equal
notequal: notequal:
MOVB $0, 0(R7) MOVD $0, R2
#ifndef GOEXPERIMENT_regabiargs
MOVB R2, 0(R5)
#endif
RET RET
equal: equal:
MOVB $1, 0(R7) MOVD $1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVB R2, 0(R5)
#endif
RET RET
tiny: tiny:
MOVD $0, R2 MOVD $0, R1
CMPBLT R6, $16, lt16 CMPBLT R4, $16, lt16
MOVD 0(R3), R8 MOVD 0(R2), R8
MOVD 0(R5), R9 MOVD 0(R3), R9
CMPBNE R8, R9, notequal CMPBNE R8, R9, notequal
MOVD 8(R3), R8 MOVD 8(R2), R8
MOVD 8(R5), R9 MOVD 8(R3), R9
CMPBNE R8, R9, notequal CMPBNE R8, R9, notequal
LA 16(R2), R2 LA 16(R1), R1
SUB $16, R6 SUB $16, R4
lt16: lt16:
CMPBLT R6, $8, lt8 CMPBLT R4, $8, lt8
MOVD 0(R3)(R2*1), R8 MOVD 0(R2)(R1*1), R8
MOVD 0(R5)(R2*1), R9 MOVD 0(R3)(R1*1), R9
CMPBNE R8, R9, notequal CMPBNE R8, R9, notequal
LA 8(R2), R2 LA 8(R1), R1
SUB $8, R6 SUB $8, R4
lt8: lt8:
CMPBLT R6, $4, lt4 CMPBLT R4, $4, lt4
MOVWZ 0(R3)(R2*1), R8 MOVWZ 0(R2)(R1*1), R8
MOVWZ 0(R5)(R2*1), R9 MOVWZ 0(R3)(R1*1), R9
CMPBNE R8, R9, notequal CMPBNE R8, R9, notequal
LA 4(R2), R2 LA 4(R1), R1
SUB $4, R6 SUB $4, R4
lt4: lt4:
#define CHECK(n) \ #define CHECK(n) \
CMPBEQ R6, $n, equal \ CMPBEQ R4, $n, equal \
MOVB n(R3)(R2*1), R8 \ MOVB n(R2)(R1*1), R8 \
MOVB n(R5)(R2*1), R9 \ MOVB n(R3)(R1*1), R9 \
CMPBNE R8, R9, notequal CMPBNE R8, R9, notequal
CHECK(0) CHECK(0)
CHECK(1) CHECK(1)
@ -88,5 +100,5 @@ lt4:
BR equal BR equal
TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5) CLC $1, 0(R2), 0(R3)
RET RET

View file

@ -5,104 +5,134 @@
#include "go_asm.h" #include "go_asm.h"
#include "textflag.h" #include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
MOVD b_base+0(FP), R3// b_base => R3
MOVD b_len+8(FP), R4 // b_len => R4
MOVBZ c+24(FP), R5 // c => R5
MOVD $ret+32(FP), R2 // &ret => R9
BR indexbytebody<>(SB)
TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
MOVD s_base+0(FP), R3// s_base => R3 #ifndef GOEXPERIMENT_regabiargs
MOVD s_len+8(FP), R4 // s_len => R4 MOVD b_base+0(FP), R2// b_base => R2
MOVBZ c+16(FP), R5 // c => R5 MOVD b_len+8(FP), R3 // b_len => R3
MOVD $ret+24(FP), R2 // &ret => R9 MOVBZ c+24(FP), R4 // c => R4
BR indexbytebody<>(SB) MOVD $ret+32(FP), R5 // &ret => R5
#else
MOVD R5, R4
AND $0xff, R4
#endif
BR indexbytebody<>(SB)
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
#ifndef GOEXPERIMENT_regabiargs
MOVD s_base+0(FP), R2 // s_base => R2
MOVD s_len+8(FP), R3 // s_len => R3
MOVBZ c+16(FP), R4 // c => R4
MOVD $ret+24(FP), R5 // &ret => R5
#else
AND $0xff, R4
#endif
BR indexbytebody<>(SB)
// input: // input:
// R3: s // R2: s
// R4: s_len // R3: s_len
// R5: c -- byte sought // R4: c -- byte sought
// R2: &ret -- address to put index into // For regabiargs output value(index) stored in R2
// For !regabiargs address of output value(index) stored in R5
TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0 TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
CMPBEQ R4, $0, notfound CMPBEQ R3, $0, notfound
MOVD R3, R6 // store base for later MOVD R2, R6 // store base for later
ADD R3, R4, R8 // the address after the end of the string ADD R2, R3, R8 // the address after the end of the string
//if the length is small, use loop; otherwise, use vector or srst search //if the length is small, use loop; otherwise, use vector or srst search
CMPBGE R4, $16, large CMPBGE R3, $16, large
residual: residual:
CMPBEQ R3, R8, notfound CMPBEQ R2, R8, notfound
MOVBZ 0(R3), R7 MOVBZ 0(R2), R7
LA 1(R3), R3 LA 1(R2), R2
CMPBNE R7, R5, residual CMPBNE R7, R4, residual
found: found:
SUB R6, R3 SUB R6, R2
SUB $1, R3 SUB $1, R2
MOVD R3, 0(R2) #ifndef GOEXPERIMENT_regabiargs
RET MOVD R2, 0(R5)
#endif
RET
notfound: notfound:
MOVD $-1, 0(R2) #ifndef GOEXPERIMENT_regabiargs
RET MOVD $-1, 0(R5)
#else
MOVD $-1, R2
#endif
RET
large: large:
MOVBZ internalcpu·S390X+const_offsetS390xHasVX(SB), R1 MOVBZ internalcpu·S390X+const_offsetS390xHasVX(SB), R1
CMPBNE R1, $0, vectorimpl CMPBNE R1, $0, vectorimpl
srstimpl: // no vector facility srstimpl: // no vector facility
MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 MOVBZ R4, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
srstloop: srstloop:
WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) WORD $0xB25E0082 // srst %r8, %r2 (search the range [R2, R8))
BVS srstloop // interrupted - continue BVS srstloop // interrupted - continue
BGT notfoundr0 BGT notfoundr0
foundr0: foundr0:
XOR R0, R0 // reset R0 XOR R0, R0 // reset R0
SUB R6, R8 // remove base SUB R6, R8 // remove base
MOVD R8, 0(R2) #ifndef GOEXPERIMENT_regabiargs
RET MOVD R8, 0(R5)
#else
MOVD R8, R2
#endif
RET
notfoundr0: notfoundr0:
XOR R0, R0 // reset R0 XOR R0, R0 // reset R0
MOVD $-1, 0(R2) #ifndef GOEXPERIMENT_regabiargs
RET MOVD $-1, 0(R5)
#else
MOVD $-1, R2
#endif
RET
vectorimpl: vectorimpl:
//if the address is not 16byte aligned, use loop for the header //if the address is not 16byte aligned, use loop for the header
MOVD R3, R8 MOVD R2, R8
AND $15, R8 AND $15, R8
CMPBGT R8, $0, notaligned CMPBGT R8, $0, notaligned
aligned: aligned:
ADD R6, R4, R8 ADD R6, R3, R8
MOVD R8, R7 MOVD R8, R7
AND $-16, R7 AND $-16, R7
// replicate c across V17 // replicate c across V17
VLVGB $0, R5, V19 VLVGB $0, R4, V19
VREPB $0, V19, V17 VREPB $0, V19, V17
vectorloop: vectorloop:
CMPBGE R3, R7, residual CMPBGE R2, R7, residual
VL 0(R3), V16 // load string to be searched into V16 VL 0(R2), V16 // load string to be searched into V16
ADD $16, R3 ADD $16, R2
VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly
BVS vectorloop BVS vectorloop
// when vector search found c in the string // when vector search found c in the string
VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7
SUB $16, R3 SUB $16, R2
SUB R6, R3 SUB R6, R2
ADD R3, R7 ADD R2, R7
MOVD R7, 0(R2) #ifndef GOEXPERIMENT_regabiargs
RET MOVD R7, 0(R5)
#else
MOVD R7, R2
#endif
RET
notaligned: notaligned:
MOVD R3, R8 MOVD R2, R8
AND $-16, R8 AND $-16, R8
ADD $16, R8 ADD $16, R8
notalignedloop: notalignedloop:
CMPBEQ R3, R8, aligned CMPBEQ R2, R8, aligned
MOVBZ 0(R3), R7 MOVBZ 0(R2), R7
LA 1(R3), R3 LA 1(R2), R2
CMPBNE R7, R5, notalignedloop CMPBNE R7, R4, notalignedloop
BR found BR found