mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: wire up math/bits.Len intrinsics for loong64
For the SubFromLen64 codegen test case to work as intended, we need
to fold c-(-(x-d)) into x+(c-d).
Still, some instances of LeadingZeros are not optimized into single
CLZ instructions right now (actually, the LeadingZeros micro-benchmarks
are currently still compiled with redundant adds/subs of 64, due to
interference of loop optimizations before lowering), but perf numbers
indicate it's not that bad after all.
Micro-benchmark results on Loongson 3A5000 and 3A6000:
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000 @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
LeadingZeros 3.660n ± 0% 1.348n ± 0% -63.17% (p=0.000 n=20)
LeadingZeros8 1.777n ± 0% 1.767n ± 0% -0.56% (p=0.000 n=20)
LeadingZeros16 2.816n ± 0% 1.770n ± 0% -37.14% (p=0.000 n=20)
LeadingZeros32 5.293n ± 1% 1.683n ± 0% -68.21% (p=0.000 n=20)
LeadingZeros64 3.622n ± 0% 1.349n ± 0% -62.76% (p=0.000 n=20)
geomean 3.229n 1.571n -51.35%
goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000 @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
LeadingZeros 2.410n ± 0% 1.103n ± 1% -54.23% (p=0.000 n=20)
LeadingZeros8 1.236n ± 0% 1.501n ± 0% +21.44% (p=0.000 n=20)
LeadingZeros16 2.106n ± 0% 1.501n ± 0% -28.73% (p=0.000 n=20)
LeadingZeros32 2.860n ± 0% 1.324n ± 0% -53.72% (p=0.000 n=20)
LeadingZeros64 2.6135n ± 0% 0.9509n ± 0% -63.62% (p=0.000 n=20)
geomean 2.159n 1.256n -41.81%
Updates #59120
This patch is a copy of CL 483356.
Co-authored-by: WANG Xuerui <git@xen0n.name>
Change-Id: Iee81a17f7da06d77a427e73dfcc016f2b15ae556
Reviewed-on: https://go-review.googlesource.com/c/go/+/624575
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
This commit is contained in:
parent
671f2841cb
commit
d98c51809d
8 changed files with 159 additions and 5 deletions
|
|
@ -17,6 +17,7 @@ func LeadingZeros(n uint) int {
|
|||
// amd64/v3:"LZCNTQ", -"BSRQ"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV",-"SUB"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"CNTLZD"
|
||||
|
|
@ -28,6 +29,7 @@ func LeadingZeros64(n uint64) int {
|
|||
// amd64/v3:"LZCNTQ", -"BSRQ"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV",-"SUB"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"CNTLZD"
|
||||
|
|
@ -39,6 +41,7 @@ func LeadingZeros32(n uint32) int {
|
|||
// amd64/v3: "LZCNTL",- "BSRL"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZW"
|
||||
// loong64:"CLZW",-"SUB"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"CNTLZW"
|
||||
|
|
@ -50,6 +53,7 @@ func LeadingZeros16(n uint16) int {
|
|||
// amd64/v3: "LZCNTL",- "BSRL"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"CNTLZD"
|
||||
|
|
@ -61,6 +65,7 @@ func LeadingZeros8(n uint8) int {
|
|||
// amd64/v3: "LZCNTL",- "BSRL"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"CNTLZD"
|
||||
|
|
@ -76,6 +81,7 @@ func Len(n uint) int {
|
|||
// amd64/v3: "LZCNTQ"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"SUBC","CNTLZD"
|
||||
|
|
@ -87,6 +93,7 @@ func Len64(n uint64) int {
|
|||
// amd64/v3: "LZCNTQ"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"SUBC","CNTLZD"
|
||||
|
|
@ -94,15 +101,22 @@ func Len64(n uint64) int {
|
|||
}
|
||||
|
||||
func SubFromLen64(n uint64) int {
|
||||
// loong64:"CLZV",-"ADD"
|
||||
// ppc64x:"CNTLZD",-"SUBC"
|
||||
return 64 - bits.Len64(n)
|
||||
}
|
||||
|
||||
func CompareWithLen64(n uint64) bool {
|
||||
// loong64:"CLZV",-"ADD",-"[$]64",-"[$]9"
|
||||
return bits.Len64(n) < 9
|
||||
}
|
||||
|
||||
func Len32(n uint32) int {
|
||||
// amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ"
|
||||
// amd64/v3: "LZCNTL"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZW"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x: "CNTLZW"
|
||||
|
|
@ -114,6 +128,7 @@ func Len16(n uint16) int {
|
|||
// amd64/v3: "LZCNTL"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"SUBC","CNTLZD"
|
||||
|
|
@ -125,6 +140,7 @@ func Len8(n uint8) int {
|
|||
// amd64/v3: "LZCNTL"
|
||||
// s390x:"FLOGR"
|
||||
// arm:"CLZ" arm64:"CLZ"
|
||||
// loong64:"CLZV"
|
||||
// mips:"CLZ"
|
||||
// wasm:"I64Clz"
|
||||
// ppc64x:"SUBC","CNTLZD"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue