2018-03-05 20:59:40 +01:00
|
|
|
// asmcheck
|
|
|
|
|
|
2018-04-15 23:03:12 +02:00
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2018-03-05 20:59:40 +01:00
|
|
|
package codegen
|
|
|
|
|
|
|
|
|
|
func cmovint(c int) int {
|
|
|
|
|
x := c + 4
|
|
|
|
|
if x < 0 {
|
|
|
|
|
x = 182
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQLT"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSEL LT"
|
|
|
|
|
// ppc64x:"ISEL [$]0"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovchan(x, y chan int) chan int {
|
|
|
|
|
if x != y {
|
|
|
|
|
x = y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQNE"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSEL NE"
|
|
|
|
|
// ppc64x:"ISEL [$]2"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovuintptr(x, y uintptr) uintptr {
|
|
|
|
|
if x < y {
|
|
|
|
|
x = -y
|
|
|
|
|
}
|
2020-12-30 12:05:57 -05:00
|
|
|
// amd64:"CMOVQ(HI|CS)"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG LS"
|
|
|
|
|
// ppc64x:"ISEL [$]1"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmov32bit(x, y uint32) uint32 {
|
|
|
|
|
if x < y {
|
|
|
|
|
x = -y
|
|
|
|
|
}
|
2020-12-30 12:05:57 -05:00
|
|
|
// amd64:"CMOVL(HI|CS)"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG (LS|HS)"
|
|
|
|
|
// ppc64x:"ISEL [$]1"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmov16bit(x, y uint16) uint16 {
|
|
|
|
|
if x < y {
|
|
|
|
|
x = -y
|
|
|
|
|
}
|
2020-12-30 12:05:57 -05:00
|
|
|
// amd64:"CMOVW(HI|CS)"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG (LS|HS)"
|
|
|
|
|
// ppc64x:"ISEL [$][01]"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Floating point comparison. For EQ/NE, we must
|
|
|
|
|
// generate special code to handle NaNs.
|
|
|
|
|
func cmovfloateq(x, y float64) int {
|
|
|
|
|
a := 128
|
|
|
|
|
if x == y {
|
|
|
|
|
a = 256
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// amd64:"CMOVQNE" "CMOVQPC"
|
|
|
|
|
// arm64:"CSEL EQ"
|
|
|
|
|
// ppc64x:"ISEL [$]2"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovfloatne(x, y float64) int {
|
|
|
|
|
a := 128
|
|
|
|
|
if x != y {
|
|
|
|
|
a = 256
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// amd64:"CMOVQNE" "CMOVQPS"
|
|
|
|
|
// arm64:"CSEL NE"
|
|
|
|
|
// ppc64x:"ISEL [$]2"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:noinline
|
|
|
|
|
func frexp(f float64) (frac float64, exp int) {
|
|
|
|
|
return 1.0, 4
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:noinline
|
|
|
|
|
func ldexp(frac float64, exp int) float64 {
|
|
|
|
|
return 1.0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Generate a CMOV with a floating comparison and integer move.
|
|
|
|
|
func cmovfloatint2(x, y float64) float64 {
|
|
|
|
|
yfr, yexp := 4.0, 5
|
|
|
|
|
|
|
|
|
|
r := x
|
|
|
|
|
for r >= y {
|
|
|
|
|
rfr, rexp := frexp(r)
|
|
|
|
|
if rfr < yfr {
|
cmd/compile: add opt branchelim to rewrite some CondSelect into math
This allows something like:
if y { x++ }
To be compiled to:
MOVBLZX BX, CX
ADDQ CX, AX
Instead of:
LEAQ 1(AX), CX
MOVBLZX BL, DX
TESTQ DX, DX
CMOVQNE CX, AX
While ./make.bash uniqued per LOC, there is 100 additions and 75 substractions.
See benchmark here: https://go.dev/play/p/DJf5COjwhd_s
Either it's a performance no-op or it is faster:
goos: linux
goarch: amd64
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
CmovInlineConditionAddLatency-12 0.5443n ± 5% 0.5339n ± 3% -1.90% (p=0.004 n=10)
CmovInlineConditionAddThroughputBy6-12 1.492n ± 1% 1.494n ± 1% ~ (p=0.955 n=10)
CmovInlineConditionSubLatency-12 0.5419n ± 3% 0.5282n ± 3% -2.52% (p=0.019 n=10)
CmovInlineConditionSubThroughputBy6-12 1.587n ± 1% 1.584n ± 2% ~ (p=0.492 n=10)
CmovOutlineConditionAddLatency-12 0.5223n ± 1% 0.2639n ± 4% -49.47% (p=0.000 n=10)
CmovOutlineConditionAddThroughputBy6-12 1.159n ± 1% 1.097n ± 2% -5.35% (p=0.000 n=10)
CmovOutlineConditionSubLatency-12 0.5271n ± 3% 0.2654n ± 2% -49.66% (p=0.000 n=10)
CmovOutlineConditionSubThroughputBy6-12 1.053n ± 1% 1.050n ± 1% ~ (p=1.000 n=10)
geomean
There are other benefits not tested by this benchmark:
- the math form is usually a couple bytes shorter (ICACHE)
- the math form is usually 0~2 uops shorter (UCACHE)
- the math form has usually less register pressure*
- the math form can sometimes be optimized further
*regalloc rarely find how it can use less registers
As far as pass ordering goes there are many possible options,
I've decided to reorder branchelim before late opt since:
- unlike running exclusively the CondSelect rules after branchelim,
some extra optimizations might trigger on the adds or subs.
- I don't want to maintain a second generic.rules file of only the stuff,
that can trigger after branchelim.
- rerunning all of opt a third time increase compilation time for little gains.
By elimination moving branchelim seems fine.
Change-Id: I869adf57e4d109948ee157cfc47144445146bafd
Reviewed-on: https://go-review.googlesource.com/c/go/+/685676
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-07-03 01:35:51 +02:00
|
|
|
rexp = rexp - 42
|
2018-03-05 20:59:40 +01:00
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQHI"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSEL MI"
|
|
|
|
|
// ppc64x:"ISEL [$]0"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2020-04-20 20:33:19 +00:00
|
|
|
r = r - ldexp(y, rexp-yexp)
|
2018-03-05 20:59:40 +01:00
|
|
|
}
|
|
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovloaded(x [4]int, y int) int {
|
|
|
|
|
if x[2] != 0 {
|
|
|
|
|
y = x[2]
|
|
|
|
|
} else {
|
|
|
|
|
y = y >> 2
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQNE"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSEL NE"
|
|
|
|
|
// ppc64x:"ISEL [$]2"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovuintptr2(x, y uintptr) uintptr {
|
|
|
|
|
a := x * 2
|
|
|
|
|
if a == 0 {
|
|
|
|
|
a = 256
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQEQ"
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSEL EQ"
|
|
|
|
|
// ppc64x:"ISEL [$]2"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-18 15:20:15 -06:00
|
|
|
// Floating point CMOVs are not supported by amd64/arm64/ppc64x
|
2018-03-05 20:59:40 +01:00
|
|
|
func cmovfloatmove(x, y int) float64 {
|
|
|
|
|
a := 1.0
|
|
|
|
|
if x <= y {
|
|
|
|
|
a = 2.0
|
|
|
|
|
}
|
|
|
|
|
// amd64:-"CMOV"
|
|
|
|
|
// arm64:-"CSEL"
|
2023-01-18 15:20:15 -06:00
|
|
|
// ppc64x:-"ISEL"
|
2019-08-20 09:03:41 +00:00
|
|
|
// wasm:-"Select"
|
2018-03-05 20:59:40 +01:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// On amd64, the following patterns trigger comparison inversion.
|
|
|
|
|
// Test that we correctly invert the CMOV condition
|
|
|
|
|
var gsink int64
|
|
|
|
|
var gusink uint64
|
|
|
|
|
|
|
|
|
|
func cmovinvert1(x, y int64) int64 {
|
|
|
|
|
if x < gsink {
|
|
|
|
|
y = -y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQGT"
|
|
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
func cmovinvert2(x, y int64) int64 {
|
|
|
|
|
if x <= gsink {
|
|
|
|
|
y = -y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQGE"
|
|
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
func cmovinvert3(x, y int64) int64 {
|
|
|
|
|
if x == gsink {
|
|
|
|
|
y = -y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQEQ"
|
|
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
func cmovinvert4(x, y int64) int64 {
|
|
|
|
|
if x != gsink {
|
|
|
|
|
y = -y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQNE"
|
|
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
func cmovinvert5(x, y uint64) uint64 {
|
|
|
|
|
if x > gusink {
|
|
|
|
|
y = -y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQCS"
|
|
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
func cmovinvert6(x, y uint64) uint64 {
|
|
|
|
|
if x >= gusink {
|
|
|
|
|
y = -y
|
|
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQLS"
|
|
|
|
|
return y
|
|
|
|
|
}
|
2018-10-29 15:14:39 -07:00
|
|
|
|
|
|
|
|
func cmovload(a []int, i int, b bool) int {
|
|
|
|
|
if b {
|
cmd/compile: add opt branchelim to rewrite some CondSelect into math
This allows something like:
if y { x++ }
To be compiled to:
MOVBLZX BX, CX
ADDQ CX, AX
Instead of:
LEAQ 1(AX), CX
MOVBLZX BL, DX
TESTQ DX, DX
CMOVQNE CX, AX
While ./make.bash uniqued per LOC, there is 100 additions and 75 substractions.
See benchmark here: https://go.dev/play/p/DJf5COjwhd_s
Either it's a performance no-op or it is faster:
goos: linux
goarch: amd64
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
CmovInlineConditionAddLatency-12 0.5443n ± 5% 0.5339n ± 3% -1.90% (p=0.004 n=10)
CmovInlineConditionAddThroughputBy6-12 1.492n ± 1% 1.494n ± 1% ~ (p=0.955 n=10)
CmovInlineConditionSubLatency-12 0.5419n ± 3% 0.5282n ± 3% -2.52% (p=0.019 n=10)
CmovInlineConditionSubThroughputBy6-12 1.587n ± 1% 1.584n ± 2% ~ (p=0.492 n=10)
CmovOutlineConditionAddLatency-12 0.5223n ± 1% 0.2639n ± 4% -49.47% (p=0.000 n=10)
CmovOutlineConditionAddThroughputBy6-12 1.159n ± 1% 1.097n ± 2% -5.35% (p=0.000 n=10)
CmovOutlineConditionSubLatency-12 0.5271n ± 3% 0.2654n ± 2% -49.66% (p=0.000 n=10)
CmovOutlineConditionSubThroughputBy6-12 1.053n ± 1% 1.050n ± 1% ~ (p=1.000 n=10)
geomean
There are other benefits not tested by this benchmark:
- the math form is usually a couple bytes shorter (ICACHE)
- the math form is usually 0~2 uops shorter (UCACHE)
- the math form has usually less register pressure*
- the math form can sometimes be optimized further
*regalloc rarely find how it can use less registers
As far as pass ordering goes there are many possible options,
I've decided to reorder branchelim before late opt since:
- unlike running exclusively the CondSelect rules after branchelim,
some extra optimizations might trigger on the adds or subs.
- I don't want to maintain a second generic.rules file of only the stuff,
that can trigger after branchelim.
- rerunning all of opt a third time increase compilation time for little gains.
By elimination moving branchelim seems fine.
Change-Id: I869adf57e4d109948ee157cfc47144445146bafd
Reviewed-on: https://go-review.googlesource.com/c/go/+/685676
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-07-03 01:35:51 +02:00
|
|
|
i += 42
|
2018-10-29 15:14:39 -07:00
|
|
|
}
|
|
|
|
|
// See issue 26306
|
|
|
|
|
// amd64:-"CMOVQNE"
|
|
|
|
|
return a[i]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovstore(a []int, i int, b bool) {
|
|
|
|
|
if b {
|
cmd/compile: add opt branchelim to rewrite some CondSelect into math
This allows something like:
if y { x++ }
To be compiled to:
MOVBLZX BX, CX
ADDQ CX, AX
Instead of:
LEAQ 1(AX), CX
MOVBLZX BL, DX
TESTQ DX, DX
CMOVQNE CX, AX
While ./make.bash uniqued per LOC, there is 100 additions and 75 substractions.
See benchmark here: https://go.dev/play/p/DJf5COjwhd_s
Either it's a performance no-op or it is faster:
goos: linux
goarch: amd64
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
CmovInlineConditionAddLatency-12 0.5443n ± 5% 0.5339n ± 3% -1.90% (p=0.004 n=10)
CmovInlineConditionAddThroughputBy6-12 1.492n ± 1% 1.494n ± 1% ~ (p=0.955 n=10)
CmovInlineConditionSubLatency-12 0.5419n ± 3% 0.5282n ± 3% -2.52% (p=0.019 n=10)
CmovInlineConditionSubThroughputBy6-12 1.587n ± 1% 1.584n ± 2% ~ (p=0.492 n=10)
CmovOutlineConditionAddLatency-12 0.5223n ± 1% 0.2639n ± 4% -49.47% (p=0.000 n=10)
CmovOutlineConditionAddThroughputBy6-12 1.159n ± 1% 1.097n ± 2% -5.35% (p=0.000 n=10)
CmovOutlineConditionSubLatency-12 0.5271n ± 3% 0.2654n ± 2% -49.66% (p=0.000 n=10)
CmovOutlineConditionSubThroughputBy6-12 1.053n ± 1% 1.050n ± 1% ~ (p=1.000 n=10)
geomean
There are other benefits not tested by this benchmark:
- the math form is usually a couple bytes shorter (ICACHE)
- the math form is usually 0~2 uops shorter (UCACHE)
- the math form has usually less register pressure*
- the math form can sometimes be optimized further
*regalloc rarely find how it can use less registers
As far as pass ordering goes there are many possible options,
I've decided to reorder branchelim before late opt since:
- unlike running exclusively the CondSelect rules after branchelim,
some extra optimizations might trigger on the adds or subs.
- I don't want to maintain a second generic.rules file of only the stuff,
that can trigger after branchelim.
- rerunning all of opt a third time increase compilation time for little gains.
By elimination moving branchelim seems fine.
Change-Id: I869adf57e4d109948ee157cfc47144445146bafd
Reviewed-on: https://go-review.googlesource.com/c/go/+/685676
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-07-03 01:35:51 +02:00
|
|
|
i += 42
|
2018-10-29 15:14:39 -07:00
|
|
|
}
|
|
|
|
|
// amd64:"CMOVQNE"
|
|
|
|
|
a[i] = 7
|
|
|
|
|
}
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
|
|
|
|
|
var r0, r1, r2, r3, r4, r5 int
|
|
|
|
|
|
|
|
|
|
func cmovinc(cond bool, a, b, c int) {
|
|
|
|
|
var x0, x1 int
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x0 = a
|
|
|
|
|
} else {
|
|
|
|
|
x0 = b + 1
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC NE", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r0 = x0
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x1 = b + 1
|
|
|
|
|
} else {
|
|
|
|
|
x1 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r1 = x1
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
c++
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r2 = c
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovinv(cond bool, a, b int) {
|
|
|
|
|
var x0, x1 int
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x0 = a
|
|
|
|
|
} else {
|
|
|
|
|
x0 = ^b
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINV NE", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r0 = x0
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x1 = ^b
|
|
|
|
|
} else {
|
|
|
|
|
x1 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINV EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r1 = x1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovneg(cond bool, a, b, c int) {
|
|
|
|
|
var x0, x1 int
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x0 = a
|
|
|
|
|
} else {
|
|
|
|
|
x0 = -b
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG NE", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r0 = x0
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x1 = -b
|
|
|
|
|
} else {
|
|
|
|
|
x1 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r1 = x1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovsetm(cond bool, x int) {
|
|
|
|
|
var x0, x1 int
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x0 = -1
|
|
|
|
|
} else {
|
|
|
|
|
x0 = 0
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSETM NE", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r0 = x0
|
|
|
|
|
|
|
|
|
|
if cond {
|
|
|
|
|
x1 = 0
|
|
|
|
|
} else {
|
|
|
|
|
x1 = -1
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSETM EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r1 = x1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovFcmp0(s, t float64, a, b int) {
|
|
|
|
|
var x0, x1, x2, x3, x4, x5 int
|
|
|
|
|
|
|
|
|
|
if s < t {
|
|
|
|
|
x0 = a
|
|
|
|
|
} else {
|
|
|
|
|
x0 = b + 1
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC MI", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r0 = x0
|
|
|
|
|
|
|
|
|
|
if s <= t {
|
|
|
|
|
x1 = a
|
|
|
|
|
} else {
|
|
|
|
|
x1 = ^b
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINV LS", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r1 = x1
|
|
|
|
|
|
|
|
|
|
if s > t {
|
|
|
|
|
x2 = a
|
|
|
|
|
} else {
|
|
|
|
|
x2 = -b
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG MI", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r2 = x2
|
|
|
|
|
|
|
|
|
|
if s >= t {
|
|
|
|
|
x3 = -1
|
|
|
|
|
} else {
|
|
|
|
|
x3 = 0
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSETM LS", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r3 = x3
|
|
|
|
|
|
|
|
|
|
if s == t {
|
|
|
|
|
x4 = a
|
|
|
|
|
} else {
|
|
|
|
|
x4 = b + 1
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r4 = x4
|
|
|
|
|
|
|
|
|
|
if s != t {
|
|
|
|
|
x5 = a
|
|
|
|
|
} else {
|
|
|
|
|
x5 = b + 1
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC NE", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r5 = x5
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovFcmp1(s, t float64, a, b int) {
|
|
|
|
|
var x0, x1, x2, x3, x4, x5 int
|
|
|
|
|
|
|
|
|
|
if s < t {
|
|
|
|
|
x0 = b + 1
|
|
|
|
|
} else {
|
|
|
|
|
x0 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC PL", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r0 = x0
|
|
|
|
|
|
|
|
|
|
if s <= t {
|
|
|
|
|
x1 = ^b
|
|
|
|
|
} else {
|
|
|
|
|
x1 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINV HI", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r1 = x1
|
|
|
|
|
|
|
|
|
|
if s > t {
|
|
|
|
|
x2 = -b
|
|
|
|
|
} else {
|
|
|
|
|
x2 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSNEG PL", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r2 = x2
|
|
|
|
|
|
|
|
|
|
if s >= t {
|
|
|
|
|
x3 = 0
|
|
|
|
|
} else {
|
|
|
|
|
x3 = -1
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSETM HI", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r3 = x3
|
|
|
|
|
|
|
|
|
|
if s == t {
|
|
|
|
|
x4 = b + 1
|
|
|
|
|
} else {
|
|
|
|
|
x4 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC NE", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r4 = x4
|
|
|
|
|
|
|
|
|
|
if s != t {
|
|
|
|
|
x5 = b + 1
|
|
|
|
|
} else {
|
|
|
|
|
x5 = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// arm64:"CSINC EQ", -"CSEL"
|
cmd/compile: add rewrite rules for conditional instructions on arm64
This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.
For example,
func test(cond bool, a int) int {
if cond {
a++
}
return a
}
Before:
MOVD "".a+8(RSP), R0
ADD $1, R0, R1
MOVBU "".cond(RSP), R2
CMPW $0, R2
CSEL NE, R1, R0, R0
After:
MOVBU "".cond(RSP), R0
CMPW $0, R0
MOVD "".a+8(RSP), R0
CSINC EQ, R0, R0, R0
This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>
Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2021-01-18 14:32:49 +08:00
|
|
|
r5 = x5
|
|
|
|
|
}
|
2022-07-09 19:26:47 +08:00
|
|
|
|
|
|
|
|
func cmovzero1(c bool) int {
|
|
|
|
|
var x int
|
|
|
|
|
if c {
|
|
|
|
|
x = 182
|
|
|
|
|
}
|
|
|
|
|
// loong64:"MASKEQZ", -"MASKNEZ"
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovzero2(c bool) int {
|
|
|
|
|
var x int
|
|
|
|
|
if !c {
|
|
|
|
|
x = 182
|
|
|
|
|
}
|
|
|
|
|
// loong64:"MASKNEZ", -"MASKEQZ"
|
|
|
|
|
return x
|
|
|
|
|
}
|
2022-10-12 14:02:38 -05:00
|
|
|
|
|
|
|
|
// Conditionally selecting between a value or 0 can be done without
|
|
|
|
|
// an extra load of 0 to a register on PPC64 by using R0 (which always
|
|
|
|
|
// holds the value $0) instead. Verify both cases where either arg1
|
|
|
|
|
// or arg2 is zero.
|
|
|
|
|
func cmovzeroreg0(a, b int) int {
|
|
|
|
|
x := 0
|
|
|
|
|
if a == b {
|
|
|
|
|
x = a
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// ppc64x:"ISEL [$]2, R[0-9]+, R0, R[0-9]+"
|
2022-10-12 14:02:38 -05:00
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovzeroreg1(a, b int) int {
|
|
|
|
|
x := a
|
|
|
|
|
if a == b {
|
|
|
|
|
x = 0
|
|
|
|
|
}
|
2025-10-26 22:51:14 -04:00
|
|
|
// ppc64x:"ISEL [$]2, R0, R[0-9]+, R[0-9]+"
|
2022-10-12 14:02:38 -05:00
|
|
|
return x
|
|
|
|
|
}
|
cmd/compile: add opt branchelim to rewrite some CondSelect into math
This allows something like:
if y { x++ }
To be compiled to:
MOVBLZX BX, CX
ADDQ CX, AX
Instead of:
LEAQ 1(AX), CX
MOVBLZX BL, DX
TESTQ DX, DX
CMOVQNE CX, AX
While ./make.bash uniqued per LOC, there is 100 additions and 75 substractions.
See benchmark here: https://go.dev/play/p/DJf5COjwhd_s
Either it's a performance no-op or it is faster:
goos: linux
goarch: amd64
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
CmovInlineConditionAddLatency-12 0.5443n ± 5% 0.5339n ± 3% -1.90% (p=0.004 n=10)
CmovInlineConditionAddThroughputBy6-12 1.492n ± 1% 1.494n ± 1% ~ (p=0.955 n=10)
CmovInlineConditionSubLatency-12 0.5419n ± 3% 0.5282n ± 3% -2.52% (p=0.019 n=10)
CmovInlineConditionSubThroughputBy6-12 1.587n ± 1% 1.584n ± 2% ~ (p=0.492 n=10)
CmovOutlineConditionAddLatency-12 0.5223n ± 1% 0.2639n ± 4% -49.47% (p=0.000 n=10)
CmovOutlineConditionAddThroughputBy6-12 1.159n ± 1% 1.097n ± 2% -5.35% (p=0.000 n=10)
CmovOutlineConditionSubLatency-12 0.5271n ± 3% 0.2654n ± 2% -49.66% (p=0.000 n=10)
CmovOutlineConditionSubThroughputBy6-12 1.053n ± 1% 1.050n ± 1% ~ (p=1.000 n=10)
geomean
There are other benefits not tested by this benchmark:
- the math form is usually a couple bytes shorter (ICACHE)
- the math form is usually 0~2 uops shorter (UCACHE)
- the math form has usually less register pressure*
- the math form can sometimes be optimized further
*regalloc rarely find how it can use less registers
As far as pass ordering goes there are many possible options,
I've decided to reorder branchelim before late opt since:
- unlike running exclusively the CondSelect rules after branchelim,
some extra optimizations might trigger on the adds or subs.
- I don't want to maintain a second generic.rules file of only the stuff,
that can trigger after branchelim.
- rerunning all of opt a third time increase compilation time for little gains.
By elimination moving branchelim seems fine.
Change-Id: I869adf57e4d109948ee157cfc47144445146bafd
Reviewed-on: https://go-review.googlesource.com/c/go/+/685676
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-07-03 01:35:51 +02:00
|
|
|
|
|
|
|
|
func cmovmathadd(a uint, b bool) uint {
|
|
|
|
|
if b {
|
|
|
|
|
a++
|
|
|
|
|
}
|
|
|
|
|
// amd64:"ADDQ", -"CMOV"
|
|
|
|
|
// arm64:"CSINC", -"CSEL"
|
|
|
|
|
// ppc64x:"ADD", -"ISEL"
|
2025-07-27 00:17:49 +01:00
|
|
|
// wasm:"I64Add", -"Select"
|
cmd/compile: add opt branchelim to rewrite some CondSelect into math
This allows something like:
if y { x++ }
To be compiled to:
MOVBLZX BX, CX
ADDQ CX, AX
Instead of:
LEAQ 1(AX), CX
MOVBLZX BL, DX
TESTQ DX, DX
CMOVQNE CX, AX
While ./make.bash uniqued per LOC, there is 100 additions and 75 substractions.
See benchmark here: https://go.dev/play/p/DJf5COjwhd_s
Either it's a performance no-op or it is faster:
goos: linux
goarch: amd64
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
CmovInlineConditionAddLatency-12 0.5443n ± 5% 0.5339n ± 3% -1.90% (p=0.004 n=10)
CmovInlineConditionAddThroughputBy6-12 1.492n ± 1% 1.494n ± 1% ~ (p=0.955 n=10)
CmovInlineConditionSubLatency-12 0.5419n ± 3% 0.5282n ± 3% -2.52% (p=0.019 n=10)
CmovInlineConditionSubThroughputBy6-12 1.587n ± 1% 1.584n ± 2% ~ (p=0.492 n=10)
CmovOutlineConditionAddLatency-12 0.5223n ± 1% 0.2639n ± 4% -49.47% (p=0.000 n=10)
CmovOutlineConditionAddThroughputBy6-12 1.159n ± 1% 1.097n ± 2% -5.35% (p=0.000 n=10)
CmovOutlineConditionSubLatency-12 0.5271n ± 3% 0.2654n ± 2% -49.66% (p=0.000 n=10)
CmovOutlineConditionSubThroughputBy6-12 1.053n ± 1% 1.050n ± 1% ~ (p=1.000 n=10)
geomean
There are other benefits not tested by this benchmark:
- the math form is usually a couple bytes shorter (ICACHE)
- the math form is usually 0~2 uops shorter (UCACHE)
- the math form has usually less register pressure*
- the math form can sometimes be optimized further
*regalloc rarely find how it can use less registers
As far as pass ordering goes there are many possible options,
I've decided to reorder branchelim before late opt since:
- unlike running exclusively the CondSelect rules after branchelim,
some extra optimizations might trigger on the adds or subs.
- I don't want to maintain a second generic.rules file of only the stuff,
that can trigger after branchelim.
- rerunning all of opt a third time increase compilation time for little gains.
By elimination moving branchelim seems fine.
Change-Id: I869adf57e4d109948ee157cfc47144445146bafd
Reviewed-on: https://go-review.googlesource.com/c/go/+/685676
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-07-03 01:35:51 +02:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovmathsub(a uint, b bool) uint {
|
|
|
|
|
if b {
|
|
|
|
|
a--
|
|
|
|
|
}
|
|
|
|
|
// amd64:"SUBQ", -"CMOV"
|
|
|
|
|
// arm64:"SUB", -"CSEL"
|
|
|
|
|
// ppc64x:"SUB", -"ISEL"
|
2025-07-27 00:17:49 +01:00
|
|
|
// wasm:"I64Sub", -"Select"
|
cmd/compile: add opt branchelim to rewrite some CondSelect into math
This allows something like:
if y { x++ }
To be compiled to:
MOVBLZX BX, CX
ADDQ CX, AX
Instead of:
LEAQ 1(AX), CX
MOVBLZX BL, DX
TESTQ DX, DX
CMOVQNE CX, AX
While ./make.bash uniqued per LOC, there is 100 additions and 75 substractions.
See benchmark here: https://go.dev/play/p/DJf5COjwhd_s
Either it's a performance no-op or it is faster:
goos: linux
goarch: amd64
cpu: AMD Ryzen 5 3600 6-Core Processor
│ /tmp/old.logs │ /tmp/new.logs │
│ sec/op │ sec/op vs base │
CmovInlineConditionAddLatency-12 0.5443n ± 5% 0.5339n ± 3% -1.90% (p=0.004 n=10)
CmovInlineConditionAddThroughputBy6-12 1.492n ± 1% 1.494n ± 1% ~ (p=0.955 n=10)
CmovInlineConditionSubLatency-12 0.5419n ± 3% 0.5282n ± 3% -2.52% (p=0.019 n=10)
CmovInlineConditionSubThroughputBy6-12 1.587n ± 1% 1.584n ± 2% ~ (p=0.492 n=10)
CmovOutlineConditionAddLatency-12 0.5223n ± 1% 0.2639n ± 4% -49.47% (p=0.000 n=10)
CmovOutlineConditionAddThroughputBy6-12 1.159n ± 1% 1.097n ± 2% -5.35% (p=0.000 n=10)
CmovOutlineConditionSubLatency-12 0.5271n ± 3% 0.2654n ± 2% -49.66% (p=0.000 n=10)
CmovOutlineConditionSubThroughputBy6-12 1.053n ± 1% 1.050n ± 1% ~ (p=1.000 n=10)
geomean
There are other benefits not tested by this benchmark:
- the math form is usually a couple bytes shorter (ICACHE)
- the math form is usually 0~2 uops shorter (UCACHE)
- the math form has usually less register pressure*
- the math form can sometimes be optimized further
*regalloc rarely find how it can use less registers
As far as pass ordering goes there are many possible options,
I've decided to reorder branchelim before late opt since:
- unlike running exclusively the CondSelect rules after branchelim,
some extra optimizations might trigger on the adds or subs.
- I don't want to maintain a second generic.rules file of only the stuff,
that can trigger after branchelim.
- rerunning all of opt a third time increase compilation time for little gains.
By elimination moving branchelim seems fine.
Change-Id: I869adf57e4d109948ee157cfc47144445146bafd
Reviewed-on: https://go-review.googlesource.com/c/go/+/685676
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2025-07-03 01:35:51 +02:00
|
|
|
return a
|
|
|
|
|
}
|
2025-07-03 02:57:25 +02:00
|
|
|
|
|
|
|
|
func cmovmathdouble(a uint, b bool) uint {
|
|
|
|
|
if b {
|
|
|
|
|
a *= 2
|
|
|
|
|
}
|
|
|
|
|
// amd64:"SHL", -"CMOV"
|
|
|
|
|
// amd64/v3:"SHL", -"CMOV", -"MOV"
|
|
|
|
|
// arm64:"LSL", -"CSEL"
|
2025-07-27 00:17:49 +01:00
|
|
|
// wasm:"I64Shl", -"Select"
|
2025-07-03 02:57:25 +02:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovmathhalvei(a int, b bool) int {
|
|
|
|
|
if b {
|
2025-07-27 00:17:49 +01:00
|
|
|
// For some reason the compiler attributes the shift to inside this block rather than where the Phi node is.
|
2025-07-03 02:57:25 +02:00
|
|
|
// arm64:"ASR", -"CSEL"
|
2025-07-27 00:17:49 +01:00
|
|
|
// wasm:"I64ShrS", -"Select"
|
2025-07-03 02:57:25 +02:00
|
|
|
a /= 2
|
|
|
|
|
}
|
|
|
|
|
// arm64:-"CSEL"
|
2025-07-27 00:17:49 +01:00
|
|
|
// wasm:-"Select"
|
2025-07-03 02:57:25 +02:00
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovmathhalveu(a uint, b bool) uint {
|
|
|
|
|
if b {
|
|
|
|
|
a /= 2
|
|
|
|
|
}
|
|
|
|
|
// amd64:"SHR", -"CMOV"
|
|
|
|
|
// amd64/v3:"SHR", -"CMOV", -"MOV"
|
|
|
|
|
// arm64:"LSR", -"CSEL"
|
2025-07-27 00:17:49 +01:00
|
|
|
// wasm:"I64ShrU", -"Select"
|
2025-07-03 02:57:25 +02:00
|
|
|
return a
|
|
|
|
|
}
|
2025-10-27 13:05:41 +01:00
|
|
|
|
|
|
|
|
func branchlessBoolToUint8(b bool) (r uint8) {
|
|
|
|
|
if b {
|
|
|
|
|
r = 1
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cmovFromMulFromFlags64(x uint64, b bool) uint64 {
|
|
|
|
|
// amd64:-"MOVB.ZX"
|
|
|
|
|
r := uint64(branchlessBoolToUint8(b))
|
|
|
|
|
// amd64:"CMOV",-"MOVB.ZX",-"MUL"
|
|
|
|
|
return x * r
|
|
|
|
|
}
|
|
|
|
|
func cmovFromMulFromFlags64sext(x int64, b bool) int64 {
|
|
|
|
|
// amd64:-"MOVB.ZX"
|
|
|
|
|
r := int64(int8(branchlessBoolToUint8(b)))
|
|
|
|
|
// amd64:"CMOV",-"MOVB.ZX",-"MUL"
|
|
|
|
|
return x * r
|
|
|
|
|
}
|