mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
bytes: improve Compare function on amd64 for large byte arrays
This patch contains only loop unrolling change for size > 63B Following are the performance numbers for various sizes on On Haswell based system: Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz. benchcmp go.head.8.25.15.txt go.head.8.25.15.opt.txt benchmark old ns/op new ns/op delta BenchmarkBytesCompare1-4 5.37 5.37 +0.00% BenchmarkBytesCompare2-4 5.37 5.38 +0.19% BenchmarkBytesCompare4-4 5.37 5.37 +0.00% BenchmarkBytesCompare8-4 4.42 4.38 -0.90% BenchmarkBytesCompare16-4 4.27 4.45 +4.22% BenchmarkBytesCompare32-4 5.30 5.36 +1.13% BenchmarkBytesCompare64-4 6.93 6.78 -2.16% BenchmarkBytesCompare128-4 10.3 9.50 -7.77% BenchmarkBytesCompare256-4 17.1 13.8 -19.30% BenchmarkBytesCompare512-4 31.3 22.1 -29.39% BenchmarkBytesCompare1024-4 62.5 39.0 -37.60% BenchmarkBytesCompare2048-4 112 73.2 -34.64% Change-Id: I4eeb1c22732fd62cbac97ba757b0d29f648d4ef1 Reviewed-on: https://go-review.googlesource.com/11871 Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
abab21b1d1
commit
32add8d7c8
2 changed files with 81 additions and 0 deletions
|
|
@ -1255,3 +1255,34 @@ func BenchmarkRepeat(b *testing.B) {
|
||||||
Repeat([]byte("-"), 80)
|
Repeat([]byte("-"), 80)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func benchmarkBytesCompare(b *testing.B, n int) {
|
||||||
|
var x = make([]byte, n)
|
||||||
|
var y = make([]byte, n)
|
||||||
|
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
x[i] = 'a'
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
y[i] = 'a'
|
||||||
|
}
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
Compare(x, y)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkBytesCompare1(b *testing.B) { benchmarkBytesCompare(b, 1) }
|
||||||
|
func BenchmarkBytesCompare2(b *testing.B) { benchmarkBytesCompare(b, 2) }
|
||||||
|
func BenchmarkBytesCompare4(b *testing.B) { benchmarkBytesCompare(b, 4) }
|
||||||
|
func BenchmarkBytesCompare8(b *testing.B) { benchmarkBytesCompare(b, 8) }
|
||||||
|
func BenchmarkBytesCompare16(b *testing.B) { benchmarkBytesCompare(b, 16) }
|
||||||
|
func BenchmarkBytesCompare32(b *testing.B) { benchmarkBytesCompare(b, 32) }
|
||||||
|
func BenchmarkBytesCompare64(b *testing.B) { benchmarkBytesCompare(b, 64) }
|
||||||
|
func BenchmarkBytesCompare128(b *testing.B) { benchmarkBytesCompare(b, 128) }
|
||||||
|
func BenchmarkBytesCompare256(b *testing.B) { benchmarkBytesCompare(b, 256) }
|
||||||
|
func BenchmarkBytesCompare512(b *testing.B) { benchmarkBytesCompare(b, 512) }
|
||||||
|
func BenchmarkBytesCompare1024(b *testing.B) { benchmarkBytesCompare(b, 1024) }
|
||||||
|
func BenchmarkBytesCompare2048(b *testing.B) { benchmarkBytesCompare(b, 2048) }
|
||||||
|
|
|
||||||
|
|
@ -1445,6 +1445,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
|
||||||
CMPQ R8, $8
|
CMPQ R8, $8
|
||||||
JB small
|
JB small
|
||||||
|
|
||||||
|
CMPQ R8, $63
|
||||||
|
JA big_loop
|
||||||
loop:
|
loop:
|
||||||
CMPQ R8, $16
|
CMPQ R8, $16
|
||||||
JBE _0through16
|
JBE _0through16
|
||||||
|
|
@ -1459,6 +1461,17 @@ loop:
|
||||||
SUBQ $16, R8
|
SUBQ $16, R8
|
||||||
JMP loop
|
JMP loop
|
||||||
|
|
||||||
|
diff64:
|
||||||
|
ADDQ $48, SI
|
||||||
|
ADDQ $48, DI
|
||||||
|
JMP diff16
|
||||||
|
diff48:
|
||||||
|
ADDQ $32, SI
|
||||||
|
ADDQ $32, DI
|
||||||
|
JMP diff16
|
||||||
|
diff32:
|
||||||
|
ADDQ $16, SI
|
||||||
|
ADDQ $16, DI
|
||||||
// AX = bit mask of differences
|
// AX = bit mask of differences
|
||||||
diff16:
|
diff16:
|
||||||
BSFQ AX, BX // index of first byte that differs
|
BSFQ AX, BX // index of first byte that differs
|
||||||
|
|
@ -1545,6 +1558,43 @@ allsame:
|
||||||
MOVQ AX, (R9)
|
MOVQ AX, (R9)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
// this works for >= 64 bytes of data.
|
||||||
|
big_loop:
|
||||||
|
MOVOU (SI), X0
|
||||||
|
MOVOU (DI), X1
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
PMOVMSKB X1, AX
|
||||||
|
XORQ $0xffff, AX
|
||||||
|
JNE diff16
|
||||||
|
|
||||||
|
MOVOU 16(SI), X0
|
||||||
|
MOVOU 16(DI), X1
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
PMOVMSKB X1, AX
|
||||||
|
XORQ $0xffff, AX
|
||||||
|
JNE diff32
|
||||||
|
|
||||||
|
MOVOU 32(SI), X0
|
||||||
|
MOVOU 32(DI), X1
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
PMOVMSKB X1, AX
|
||||||
|
XORQ $0xffff, AX
|
||||||
|
JNE diff48
|
||||||
|
|
||||||
|
MOVOU 48(SI), X0
|
||||||
|
MOVOU 48(DI), X1
|
||||||
|
PCMPEQB X0, X1
|
||||||
|
PMOVMSKB X1, AX
|
||||||
|
XORQ $0xffff, AX
|
||||||
|
JNE diff64
|
||||||
|
|
||||||
|
ADDQ $64, SI
|
||||||
|
ADDQ $64, DI
|
||||||
|
SUBQ $64, R8
|
||||||
|
CMPQ R8, $64
|
||||||
|
JBE loop
|
||||||
|
JMP big_loop
|
||||||
|
|
||||||
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
|
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
|
||||||
MOVQ s+0(FP), SI
|
MOVQ s+0(FP), SI
|
||||||
MOVQ s_len+8(FP), BX
|
MOVQ s_len+8(FP), BX
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue