diff --git a/src/internal/bytealg/compare_s390x.s b/src/internal/bytealg/compare_s390x.s index 539454870d3..64f537be99e 100644 --- a/src/internal/bytealg/compare_s390x.s +++ b/src/internal/bytealg/compare_s390x.s @@ -5,65 +5,93 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 - MOVD a_base+0(FP), R3 - MOVD a_len+8(FP), R4 - MOVD b_base+24(FP), R5 - MOVD b_len+32(FP), R6 - LA ret+48(FP), R7 +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 +#ifndef GOEXPERIMENT_regabiargs + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R3 + MOVD b_base+24(FP), R4 + MOVD b_len+32(FP), R5 + LA ret+48(FP), R6 +#else + // R2 = a_base + // R3 = a_len + // R4 = a_cap (unused) + // R5 = b_base (want in R4) + // R6 = b_len (want in R5) + // R7 = b_cap (unused) + MOVD R5, R4 + MOVD R6, R5 +#endif BR cmpbody<>(SB) -TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 - MOVD a_base+0(FP), R3 - MOVD a_len+8(FP), R4 - MOVD b_base+16(FP), R5 - MOVD b_len+24(FP), R6 - LA ret+32(FP), R7 +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOVD a_base+0(FP), R2 + MOVD a_len+8(FP), R3 + MOVD b_base+16(FP), R4 + MOVD b_len+24(FP), R5 + LA ret+32(FP), R6 +#endif + // R2 = a_base + // R3 = a_len + // R4 = b_base + // R5 = b_len + BR cmpbody<>(SB) // input: -// R3 = a -// R4 = alen -// R5 = b -// R6 = blen -// R7 = address of output word (stores -1/0/1 here) +// R2 = a +// R3 = alen +// R4 = b +// R5 = blen +// For regabiargs output value( -1/0/1 ) stored in R2 +// For !regabiargs address of output word( stores -1/0/1 ) stored in R6 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0 - CMPBEQ R3, R5, cmplengths - MOVD R4, R8 - CMPBLE R4, R6, amin - MOVD R6, R8 + CMPBEQ R2, R4, cmplengths + MOVD R3, R7 + CMPBLE R3, R5, amin + MOVD R5, R7 amin: - CMPBEQ R8, $0, cmplengths - CMP R8, $256 + CMPBEQ R7, $0, cmplengths + CMP R7, $256 BLE tail loop: - CLC $256, 0(R3), 0(R5) + CLC $256, 0(R2), 0(R4) BGT gt BLT lt - SUB $256, R8 - MOVD $256(R3), R3 - MOVD $256(R5), R5 - CMP R8, $256 + SUB $256, R7 + MOVD $256(R2), R2 + MOVD $256(R4), R4 + CMP R7, $256 BGT loop tail: - SUB $1, R8 - EXRL $cmpbodyclc<>(SB), R8 + SUB $1, R7 + EXRL $cmpbodyclc<>(SB), R7 BGT gt BLT lt cmplengths: - CMP R4, R6 + CMP R3, R5 BEQ eq BLT lt gt: - MOVD $1, 0(R7) + MOVD $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET lt: - MOVD $-1, 0(R7) + MOVD $-1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET eq: - MOVD $0, 0(R7) + MOVD $0, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R6) +#endif RET TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 - CLC $1, 0(R3), 0(R5) + CLC $1, 0(R2), 0(R4) RET diff --git a/src/internal/bytealg/equal_s390x.s b/src/internal/bytealg/equal_s390x.s index 67f814dfc1c..48e8d0f1547 100644 --- a/src/internal/bytealg/equal_s390x.s +++ b/src/internal/bytealg/equal_s390x.s @@ -6,80 +6,92 @@ #include "textflag.h" // memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - MOVD a+0(FP), R3 - MOVD b+8(FP), R5 - MOVD size+16(FP), R6 - LA ret+24(FP), R7 +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 +#ifndef GOEXPERIMENT_regabiargs + MOVD a+0(FP), R2 + MOVD b+8(FP), R3 + MOVD size+16(FP), R4 + LA ret+24(FP), R5 +#endif BR memeqbody<>(SB) // memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 - MOVD a+0(FP), R3 - MOVD b+8(FP), R5 - MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure - LA ret+16(FP), R7 +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 +#ifndef GOEXPERIMENT_regabiargs + MOVD a+0(FP), R2 + MOVD b+8(FP), R3 + LA ret+16(FP), R5 +#endif + + MOVD 8(R12), R4 // compiler stores size at offset 8 in the closure BR memeqbody<>(SB) // input: -// R3 = a -// R5 = b -// R6 = len -// R7 = address of output byte (stores 0 or 1 here) +// R2 = a +// R3 = b +// R4 = len +// For regabiargs output value( 0/1 ) stored in R2 +// For !regabiargs address of output byte( stores 0/1 ) stored in R5 // a and b have the same length TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 - CMPBEQ R3, R5, equal + CMPBEQ R2, R3, equal loop: - CMPBEQ R6, $0, equal - CMPBLT R6, $32, tiny - CMP R6, $256 + CMPBEQ R4, $0, equal + CMPBLT R4, $32, tiny + CMP R4, $256 BLT tail - CLC $256, 0(R3), 0(R5) + CLC $256, 0(R2), 0(R3) BNE notequal - SUB $256, R6 + SUB $256, R4 + LA 256(R2), R2 LA 256(R3), R3 - LA 256(R5), R5 BR loop tail: - SUB $1, R6, R8 + SUB $1, R4, R8 EXRL $memeqbodyclc<>(SB), R8 BEQ equal notequal: - MOVB $0, 0(R7) + MOVD $0, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVB R2, 0(R5) +#endif RET equal: - MOVB $1, 0(R7) + MOVD $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVB R2, 0(R5) +#endif RET tiny: - MOVD $0, R2 - CMPBLT R6, $16, lt16 - MOVD 0(R3), R8 - MOVD 0(R5), R9 + MOVD $0, R1 + CMPBLT R4, $16, lt16 + MOVD 0(R2), R8 + MOVD 0(R3), R9 CMPBNE R8, R9, notequal - MOVD 8(R3), R8 - MOVD 8(R5), R9 + MOVD 8(R2), R8 + MOVD 8(R3), R9 CMPBNE R8, R9, notequal - LA 16(R2), R2 - SUB $16, R6 + LA 16(R1), R1 + SUB $16, R4 lt16: - CMPBLT R6, $8, lt8 - MOVD 0(R3)(R2*1), R8 - MOVD 0(R5)(R2*1), R9 + CMPBLT R4, $8, lt8 + MOVD 0(R2)(R1*1), R8 + MOVD 0(R3)(R1*1), R9 CMPBNE R8, R9, notequal - LA 8(R2), R2 - SUB $8, R6 + LA 8(R1), R1 + SUB $8, R4 lt8: - CMPBLT R6, $4, lt4 - MOVWZ 0(R3)(R2*1), R8 - MOVWZ 0(R5)(R2*1), R9 + CMPBLT R4, $4, lt4 + MOVWZ 0(R2)(R1*1), R8 + MOVWZ 0(R3)(R1*1), R9 CMPBNE R8, R9, notequal - LA 4(R2), R2 - SUB $4, R6 + LA 4(R1), R1 + SUB $4, R4 lt4: #define CHECK(n) \ - CMPBEQ R6, $n, equal \ - MOVB n(R3)(R2*1), R8 \ - MOVB n(R5)(R2*1), R9 \ + CMPBEQ R4, $n, equal \ + MOVB n(R2)(R1*1), R8 \ + MOVB n(R3)(R1*1), R9 \ CMPBNE R8, R9, notequal CHECK(0) CHECK(1) @@ -88,5 +100,5 @@ lt4: BR equal TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0 - CLC $1, 0(R3), 0(R5) + CLC $1, 0(R2), 0(R3) RET diff --git a/src/internal/bytealg/indexbyte_s390x.s b/src/internal/bytealg/indexbyte_s390x.s index cf88d92a24b..343ed672f76 100644 --- a/src/internal/bytealg/indexbyte_s390x.s +++ b/src/internal/bytealg/indexbyte_s390x.s @@ -5,104 +5,134 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 - MOVD b_base+0(FP), R3// b_base => R3 - MOVD b_len+8(FP), R4 // b_len => R4 - MOVBZ c+24(FP), R5 // c => R5 - MOVD $ret+32(FP), R2 // &ret => R9 - BR indexbytebody<>(SB) -TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 - MOVD s_base+0(FP), R3// s_base => R3 - MOVD s_len+8(FP), R4 // s_len => R4 - MOVBZ c+16(FP), R5 // c => R5 - MOVD $ret+24(FP), R2 // &ret => R9 - BR indexbytebody<>(SB) +TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOVD b_base+0(FP), R2// b_base => R2 + MOVD b_len+8(FP), R3 // b_len => R3 + MOVBZ c+24(FP), R4 // c => R4 + MOVD $ret+32(FP), R5 // &ret => R5 +#else + MOVD R5, R4 + AND $0xff, R4 +#endif + BR indexbytebody<>(SB) + +TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOVD s_base+0(FP), R2 // s_base => R2 + MOVD s_len+8(FP), R3 // s_len => R3 + MOVBZ c+16(FP), R4 // c => R4 + MOVD $ret+24(FP), R5 // &ret => R5 +#else + AND $0xff, R4 +#endif + BR indexbytebody<>(SB) // input: -// R3: s -// R4: s_len -// R5: c -- byte sought -// R2: &ret -- address to put index into +// R2: s +// R3: s_len +// R4: c -- byte sought +// For regabiargs output value(index) stored in R2 +// For !regabiargs address of output value(index) stored in R5 TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0 - CMPBEQ R4, $0, notfound - MOVD R3, R6 // store base for later - ADD R3, R4, R8 // the address after the end of the string - //if the length is small, use loop; otherwise, use vector or srst search - CMPBGE R4, $16, large + CMPBEQ R3, $0, notfound + MOVD R2, R6 // store base for later + ADD R2, R3, R8 // the address after the end of the string + //if the length is small, use loop; otherwise, use vector or srst search + CMPBGE R3, $16, large residual: - CMPBEQ R3, R8, notfound - MOVBZ 0(R3), R7 - LA 1(R3), R3 - CMPBNE R7, R5, residual + CMPBEQ R2, R8, notfound + MOVBZ 0(R2), R7 + LA 1(R2), R2 + CMPBNE R7, R4, residual found: - SUB R6, R3 - SUB $1, R3 - MOVD R3, 0(R2) - RET + SUB R6, R2 + SUB $1, R2 +#ifndef GOEXPERIMENT_regabiargs + MOVD R2, 0(R5) +#endif + RET notfound: - MOVD $-1, 0(R2) - RET +#ifndef GOEXPERIMENT_regabiargs + MOVD $-1, 0(R5) +#else + MOVD $-1, R2 +#endif + RET large: - MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 - CMPBNE R1, $0, vectorimpl + MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 + CMPBNE R1, $0, vectorimpl srstimpl: // no vector facility - MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 + MOVBZ R4, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 srstloop: - WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) - BVS srstloop // interrupted - continue - BGT notfoundr0 + WORD $0xB25E0082 // srst %r8, %r2 (search the range [R2, R8)) + BVS srstloop // interrupted - continue + BGT notfoundr0 foundr0: - XOR R0, R0 // reset R0 - SUB R6, R8 // remove base - MOVD R8, 0(R2) - RET + XOR R0, R0 // reset R0 + SUB R6, R8 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOVD R8, 0(R5) +#else + MOVD R8, R2 +#endif + RET notfoundr0: - XOR R0, R0 // reset R0 - MOVD $-1, 0(R2) - RET + XOR R0, R0 // reset R0 +#ifndef GOEXPERIMENT_regabiargs + MOVD $-1, 0(R5) +#else + MOVD $-1, R2 +#endif + RET vectorimpl: - //if the address is not 16byte aligned, use loop for the header - MOVD R3, R8 - AND $15, R8 - CMPBGT R8, $0, notaligned + //if the address is not 16byte aligned, use loop for the header + MOVD R2, R8 + AND $15, R8 + CMPBGT R8, $0, notaligned aligned: - ADD R6, R4, R8 - MOVD R8, R7 - AND $-16, R7 - // replicate c across V17 - VLVGB $0, R5, V19 - VREPB $0, V19, V17 + ADD R6, R3, R8 + MOVD R8, R7 + AND $-16, R7 + // replicate c across V17 + VLVGB $0, R4, V19 + VREPB $0, V19, V17 vectorloop: - CMPBGE R3, R7, residual - VL 0(R3), V16 // load string to be searched into V16 - ADD $16, R3 - VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly - BVS vectorloop + CMPBGE R2, R7, residual + VL 0(R2), V16 // load string to be searched into V16 + ADD $16, R2 + VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly + BVS vectorloop - // when vector search found c in the string - VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 - SUB $16, R3 - SUB R6, R3 - ADD R3, R7 - MOVD R7, 0(R2) - RET + // when vector search found c in the string + VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 + SUB $16, R2 + SUB R6, R2 + ADD R2, R7 +#ifndef GOEXPERIMENT_regabiargs + MOVD R7, 0(R5) +#else + MOVD R7, R2 +#endif + RET notaligned: - MOVD R3, R8 - AND $-16, R8 - ADD $16, R8 + MOVD R2, R8 + AND $-16, R8 + ADD $16, R8 notalignedloop: - CMPBEQ R3, R8, aligned - MOVBZ 0(R3), R7 - LA 1(R3), R3 - CMPBNE R7, R5, notalignedloop - BR found + CMPBEQ R2, R8, aligned + MOVBZ 0(R2), R7 + LA 1(R2), R2 + CMPBNE R7, R4, notalignedloop + BR found +