internal/bytealg: port bytealg functions to reg ABI on s390x

This adds support for the reg ABI to the byte/string functions for
s390x. These are initially under control of the GOEXPERIMENT
macro until all changes are in.

Updates #40724

Change-Id: Ia3532523fe3a839cc0370d6fe1544972327be514
Reviewed-on: https://go-review.googlesource.com/c/go/+/719481
Reviewed-by: Vishwanatha HD <vishwanatha.hd@ibm.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Srinivas Pokala 2025-11-11 05:26:42 +01:00 committed by Keith Randall
parent 4529c8fba6
commit 2fe0ba8d52
3 changed files with 225 additions and 155 deletions

View file

@ -5,65 +5,93 @@
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+24(FP), R5
MOVD b_len+32(FP), R6
LA ret+48(FP), R7
TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
#ifndef GOEXPERIMENT_regabiargs
MOVD a_base+0(FP), R2
MOVD a_len+8(FP), R3
MOVD b_base+24(FP), R4
MOVD b_len+32(FP), R5
LA ret+48(FP), R6
#else
// R2 = a_base
// R3 = a_len
// R4 = a_cap (unused)
// R5 = b_base (want in R4)
// R6 = b_len (want in R5)
// R7 = b_cap (unused)
MOVD R5, R4
MOVD R6, R5
#endif
BR cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+16(FP), R5
MOVD b_len+24(FP), R6
LA ret+32(FP), R7
TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
#ifndef GOEXPERIMENT_regabiargs
MOVD a_base+0(FP), R2
MOVD a_len+8(FP), R3
MOVD b_base+16(FP), R4
MOVD b_len+24(FP), R5
LA ret+32(FP), R6
#endif
// R2 = a_base
// R3 = a_len
// R4 = b_base
// R5 = b_len
BR cmpbody<>(SB)
// input:
// R3 = a
// R4 = alen
// R5 = b
// R6 = blen
// R7 = address of output word (stores -1/0/1 here)
// R2 = a
// R3 = alen
// R4 = b
// R5 = blen
// For regabiargs output value( -1/0/1 ) stored in R2
// For !regabiargs address of output word( stores -1/0/1 ) stored in R6
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, cmplengths
MOVD R4, R8
CMPBLE R4, R6, amin
MOVD R6, R8
CMPBEQ R2, R4, cmplengths
MOVD R3, R7
CMPBLE R3, R5, amin
MOVD R5, R7
amin:
CMPBEQ R8, $0, cmplengths
CMP R8, $256
CMPBEQ R7, $0, cmplengths
CMP R7, $256
BLE tail
loop:
CLC $256, 0(R3), 0(R5)
CLC $256, 0(R2), 0(R4)
BGT gt
BLT lt
SUB $256, R8
MOVD $256(R3), R3
MOVD $256(R5), R5
CMP R8, $256
SUB $256, R7
MOVD $256(R2), R2
MOVD $256(R4), R4
CMP R7, $256
BGT loop
tail:
SUB $1, R8
EXRL $cmpbodyclc<>(SB), R8
SUB $1, R7
EXRL $cmpbodyclc<>(SB), R7
BGT gt
BLT lt
cmplengths:
CMP R4, R6
CMP R3, R5
BEQ eq
BLT lt
gt:
MOVD $1, 0(R7)
MOVD $1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R6)
#endif
RET
lt:
MOVD $-1, 0(R7)
MOVD $-1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R6)
#endif
RET
eq:
MOVD $0, 0(R7)
MOVD $0, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R6)
#endif
RET
TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5)
CLC $1, 0(R2), 0(R4)
RET

View file

@ -6,80 +6,92 @@
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
MOVD a+0(FP), R3
MOVD b+8(FP), R5
MOVD size+16(FP), R6
LA ret+24(FP), R7
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
#ifndef GOEXPERIMENT_regabiargs
MOVD a+0(FP), R2
MOVD b+8(FP), R3
MOVD size+16(FP), R4
LA ret+24(FP), R5
#endif
BR memeqbody<>(SB)
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
MOVD a+0(FP), R3
MOVD b+8(FP), R5
MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure
LA ret+16(FP), R7
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
#ifndef GOEXPERIMENT_regabiargs
MOVD a+0(FP), R2
MOVD b+8(FP), R3
LA ret+16(FP), R5
#endif
MOVD 8(R12), R4 // compiler stores size at offset 8 in the closure
BR memeqbody<>(SB)
// input:
// R3 = a
// R5 = b
// R6 = len
// R7 = address of output byte (stores 0 or 1 here)
// R2 = a
// R3 = b
// R4 = len
// For regabiargs output value( 0/1 ) stored in R2
// For !regabiargs address of output byte( stores 0/1 ) stored in R5
// a and b have the same length
TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, equal
CMPBEQ R2, R3, equal
loop:
CMPBEQ R6, $0, equal
CMPBLT R6, $32, tiny
CMP R6, $256
CMPBEQ R4, $0, equal
CMPBLT R4, $32, tiny
CMP R4, $256
BLT tail
CLC $256, 0(R3), 0(R5)
CLC $256, 0(R2), 0(R3)
BNE notequal
SUB $256, R6
SUB $256, R4
LA 256(R2), R2
LA 256(R3), R3
LA 256(R5), R5
BR loop
tail:
SUB $1, R6, R8
SUB $1, R4, R8
EXRL $memeqbodyclc<>(SB), R8
BEQ equal
notequal:
MOVB $0, 0(R7)
MOVD $0, R2
#ifndef GOEXPERIMENT_regabiargs
MOVB R2, 0(R5)
#endif
RET
equal:
MOVB $1, 0(R7)
MOVD $1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVB R2, 0(R5)
#endif
RET
tiny:
MOVD $0, R2
CMPBLT R6, $16, lt16
MOVD 0(R3), R8
MOVD 0(R5), R9
MOVD $0, R1
CMPBLT R4, $16, lt16
MOVD 0(R2), R8
MOVD 0(R3), R9
CMPBNE R8, R9, notequal
MOVD 8(R3), R8
MOVD 8(R5), R9
MOVD 8(R2), R8
MOVD 8(R3), R9
CMPBNE R8, R9, notequal
LA 16(R2), R2
SUB $16, R6
LA 16(R1), R1
SUB $16, R4
lt16:
CMPBLT R6, $8, lt8
MOVD 0(R3)(R2*1), R8
MOVD 0(R5)(R2*1), R9
CMPBLT R4, $8, lt8
MOVD 0(R2)(R1*1), R8
MOVD 0(R3)(R1*1), R9
CMPBNE R8, R9, notequal
LA 8(R2), R2
SUB $8, R6
LA 8(R1), R1
SUB $8, R4
lt8:
CMPBLT R6, $4, lt4
MOVWZ 0(R3)(R2*1), R8
MOVWZ 0(R5)(R2*1), R9
CMPBLT R4, $4, lt4
MOVWZ 0(R2)(R1*1), R8
MOVWZ 0(R3)(R1*1), R9
CMPBNE R8, R9, notequal
LA 4(R2), R2
SUB $4, R6
LA 4(R1), R1
SUB $4, R4
lt4:
#define CHECK(n) \
CMPBEQ R6, $n, equal \
MOVB n(R3)(R2*1), R8 \
MOVB n(R5)(R2*1), R9 \
CMPBEQ R4, $n, equal \
MOVB n(R2)(R1*1), R8 \
MOVB n(R3)(R1*1), R9 \
CMPBNE R8, R9, notequal
CHECK(0)
CHECK(1)
@ -88,5 +100,5 @@ lt4:
BR equal
TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5)
CLC $1, 0(R2), 0(R3)
RET

View file

@ -5,46 +5,63 @@
#include "go_asm.h"
#include "textflag.h"
TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
MOVD b_base+0(FP), R3// b_base => R3
MOVD b_len+8(FP), R4 // b_len => R4
MOVBZ c+24(FP), R5 // c => R5
MOVD $ret+32(FP), R2 // &ret => R9
TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
#ifndef GOEXPERIMENT_regabiargs
MOVD b_base+0(FP), R2// b_base => R2
MOVD b_len+8(FP), R3 // b_len => R3
MOVBZ c+24(FP), R4 // c => R4
MOVD $ret+32(FP), R5 // &ret => R5
#else
MOVD R5, R4
AND $0xff, R4
#endif
BR indexbytebody<>(SB)
TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32
MOVD s_base+0(FP), R3// s_base => R3
MOVD s_len+8(FP), R4 // s_len => R4
MOVBZ c+16(FP), R5 // c => R5
MOVD $ret+24(FP), R2 // &ret => R9
TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
#ifndef GOEXPERIMENT_regabiargs
MOVD s_base+0(FP), R2 // s_base => R2
MOVD s_len+8(FP), R3 // s_len => R3
MOVBZ c+16(FP), R4 // c => R4
MOVD $ret+24(FP), R5 // &ret => R5
#else
AND $0xff, R4
#endif
BR indexbytebody<>(SB)
// input:
// R3: s
// R4: s_len
// R5: c -- byte sought
// R2: &ret -- address to put index into
// R2: s
// R3: s_len
// R4: c -- byte sought
// For regabiargs output value(index) stored in R2
// For !regabiargs address of output value(index) stored in R5
TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0
CMPBEQ R4, $0, notfound
MOVD R3, R6 // store base for later
ADD R3, R4, R8 // the address after the end of the string
CMPBEQ R3, $0, notfound
MOVD R2, R6 // store base for later
ADD R2, R3, R8 // the address after the end of the string
//if the length is small, use loop; otherwise, use vector or srst search
CMPBGE R4, $16, large
CMPBGE R3, $16, large
residual:
CMPBEQ R3, R8, notfound
MOVBZ 0(R3), R7
LA 1(R3), R3
CMPBNE R7, R5, residual
CMPBEQ R2, R8, notfound
MOVBZ 0(R2), R7
LA 1(R2), R2
CMPBNE R7, R4, residual
found:
SUB R6, R3
SUB $1, R3
MOVD R3, 0(R2)
SUB R6, R2
SUB $1, R2
#ifndef GOEXPERIMENT_regabiargs
MOVD R2, 0(R5)
#endif
RET
notfound:
MOVD $-1, 0(R2)
#ifndef GOEXPERIMENT_regabiargs
MOVD $-1, 0(R5)
#else
MOVD $-1, R2
#endif
RET
large:
@ -52,57 +69,70 @@ large:
CMPBNE R1, $0, vectorimpl
srstimpl: // no vector facility
MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
MOVBZ R4, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
srstloop:
WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8))
WORD $0xB25E0082 // srst %r8, %r2 (search the range [R2, R8))
BVS srstloop // interrupted - continue
BGT notfoundr0
foundr0:
XOR R0, R0 // reset R0
SUB R6, R8 // remove base
MOVD R8, 0(R2)
#ifndef GOEXPERIMENT_regabiargs
MOVD R8, 0(R5)
#else
MOVD R8, R2
#endif
RET
notfoundr0:
XOR R0, R0 // reset R0
MOVD $-1, 0(R2)
#ifndef GOEXPERIMENT_regabiargs
MOVD $-1, 0(R5)
#else
MOVD $-1, R2
#endif
RET
vectorimpl:
//if the address is not 16byte aligned, use loop for the header
MOVD R3, R8
MOVD R2, R8
AND $15, R8
CMPBGT R8, $0, notaligned
aligned:
ADD R6, R4, R8
ADD R6, R3, R8
MOVD R8, R7
AND $-16, R7
// replicate c across V17
VLVGB $0, R5, V19
VLVGB $0, R4, V19
VREPB $0, V19, V17
vectorloop:
CMPBGE R3, R7, residual
VL 0(R3), V16 // load string to be searched into V16
ADD $16, R3
CMPBGE R2, R7, residual
VL 0(R2), V16 // load string to be searched into V16
ADD $16, R2
VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly
BVS vectorloop
// when vector search found c in the string
VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7
SUB $16, R3
SUB R6, R3
ADD R3, R7
MOVD R7, 0(R2)
SUB $16, R2
SUB R6, R2
ADD R2, R7
#ifndef GOEXPERIMENT_regabiargs
MOVD R7, 0(R5)
#else
MOVD R7, R2
#endif
RET
notaligned:
MOVD R3, R8
MOVD R2, R8
AND $-16, R8
ADD $16, R8
notalignedloop:
CMPBEQ R3, R8, aligned
MOVBZ 0(R3), R7
LA 1(R3), R3
CMPBNE R7, R5, notalignedloop
CMPBEQ R2, R8, aligned
MOVBZ 0(R2), R7
LA 1(R2), R2
CMPBNE R7, R4, notalignedloop
BR found