mirror of
				https://github.com/golang/go.git
				synced 2025-11-04 02:30:57 +00:00 
			
		
		
		
	Optimise more branches with zero on riscv64. In particular, BLTU with zero occurs with IsInBounds checks for index zero. This currently results in two instructions and requires an additional register: li t2, 0 bltu t2, t1, 0x174b4 This is equivalent to checking if the bounds is not equal to zero. With this change: bnez t1, 0x174c0 This removes more than 500 instructions from the Go binary on riscv64. Change-Id: I6cd861d853e3ef270bd46dacecdfaa205b1c4644 Reviewed-on: https://go-review.googlesource.com/c/go/+/606715 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
		
			
				
	
	
		
			254 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			254 lines
		
	
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// asmcheck
 | 
						|
 | 
						|
// Copyright 2019 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
package codegen
 | 
						|
 | 
						|
//go:noinline
 | 
						|
func dummy() {}
 | 
						|
 | 
						|
// Signed 64-bit compare-and-branch.
 | 
						|
func si64(x, y chan int64) {
 | 
						|
	// s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x < <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x == <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Signed 64-bit compare-and-branch with 8-bit immediate.
 | 
						|
func si64x8(doNotOptimize int64) {
 | 
						|
	// take in doNotOptimize as an argument to avoid the loops being rewritten to count down
 | 
						|
	// s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, "
 | 
						|
	for i := doNotOptimize; i < 128; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, "
 | 
						|
	for i := doNotOptimize; i > -129; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, "
 | 
						|
	for i := doNotOptimize; i >= 128; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, "
 | 
						|
	for i := doNotOptimize; i <= -129; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 64-bit compare-and-branch.
 | 
						|
func ui64(x, y chan uint64) {
 | 
						|
	// s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x > <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x != <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 64-bit comparison with 8-bit immediate.
 | 
						|
func ui64x8() {
 | 
						|
	// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, "
 | 
						|
	for i := uint64(0); i < 128; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, "
 | 
						|
	for i := uint64(0); i < 256; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, "
 | 
						|
	for i := uint64(257); i >= 256; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, "
 | 
						|
	for i := uint64(1024); i > 0; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Signed 32-bit compare-and-branch.
 | 
						|
func si32(x, y chan int32) {
 | 
						|
	// s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x < <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x == <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Signed 32-bit compare-and-branch with 8-bit immediate.
 | 
						|
func si32x8(doNotOptimize int32) {
 | 
						|
	// take in doNotOptimize as an argument to avoid the loops being rewritten to count down
 | 
						|
	// s390x:"CIJ\t[$]12, R[0-9]+, [$]127, "
 | 
						|
	for i := doNotOptimize; i < 128; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, "
 | 
						|
	for i := doNotOptimize; i > -129; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CIJ\t[$]2, R[0-9]+, [$]127, "
 | 
						|
	for i := doNotOptimize; i >= 128; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, "
 | 
						|
	for i := doNotOptimize; i <= -129; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 32-bit compare-and-branch.
 | 
						|
func ui32(x, y chan uint32) {
 | 
						|
	// s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x > <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, "
 | 
						|
	for <-x != <-y {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 32-bit comparison with 8-bit immediate.
 | 
						|
func ui32x8() {
 | 
						|
	// s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, "
 | 
						|
	for i := uint32(0); i < 128; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, "
 | 
						|
	for i := uint32(0); i < 256; i++ {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, "
 | 
						|
	for i := uint32(257); i >= 256; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, "
 | 
						|
	for i := uint32(1024); i > 0; i-- {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Signed 64-bit comparison with unsigned 8-bit immediate.
 | 
						|
func si64xu8(x chan int64) {
 | 
						|
	// s390x:"CLGIJ\t[$]8, R[0-9]+, [$]128, "
 | 
						|
	for <-x == 128 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLGIJ\t[$]6, R[0-9]+, [$]255, "
 | 
						|
	for <-x != 255 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Signed 32-bit comparison with unsigned 8-bit immediate.
 | 
						|
func si32xu8(x chan int32) {
 | 
						|
	// s390x:"CLIJ\t[$]8, R[0-9]+, [$]255, "
 | 
						|
	for <-x == 255 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CLIJ\t[$]6, R[0-9]+, [$]128, "
 | 
						|
	for <-x != 128 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 64-bit comparison with signed 8-bit immediate.
 | 
						|
func ui64xu8(x chan uint64) {
 | 
						|
	// s390x:"CGIJ\t[$]8, R[0-9]+, [$]-1, "
 | 
						|
	for <-x == ^uint64(0) {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CGIJ\t[$]6, R[0-9]+, [$]-128, "
 | 
						|
	for <-x != ^uint64(127) {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 32-bit comparison with signed 8-bit immediate.
 | 
						|
func ui32xu8(x chan uint32) {
 | 
						|
	// s390x:"CIJ\t[$]8, R[0-9]+, [$]-128, "
 | 
						|
	for <-x == ^uint32(127) {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// s390x:"CIJ\t[$]6, R[0-9]+, [$]-1, "
 | 
						|
	for <-x != ^uint32(0) {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Signed 64-bit comparison with 1/-1 to comparison with 0.
 | 
						|
func si64x0(x chan int64) {
 | 
						|
	// riscv64:"BGTZ"
 | 
						|
	for <-x >= 1 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// riscv64:"BLEZ"
 | 
						|
	for <-x < 1 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// riscv64:"BLTZ"
 | 
						|
	for <-x <= -1 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// riscv64:"BGEZ"
 | 
						|
	for <-x > -1 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Unsigned 64-bit comparison with 1 to comparison with 0.
 | 
						|
func ui64x0(x chan uint64) {
 | 
						|
	// riscv64:"BNEZ"
 | 
						|
	for <-x >= 1 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// riscv64:"BEQZ"
 | 
						|
	for <-x < 1 {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// riscv64:"BNEZ"
 | 
						|
	for 0 < <-x {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
 | 
						|
	// riscv64:"BEQZ"
 | 
						|
	for 0 >= <-x {
 | 
						|
		dummy()
 | 
						|
	}
 | 
						|
}
 |