mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: use prove pass to detect Ctz of non-zero values
On amd64, Ctz must include special handling of zeros.
But the prove pass has enough information to detect whether the input
is non-zero, allowing a more efficient lowering.
Introduce new CtzNonZero ops to capture and use this information.
Benchmark code:
func BenchmarkVisitBits(b *testing.B) {
b.Run("8", func(b *testing.B) {
for i := 0; i < b.N; i++ {
x := uint8(0xff)
for x != 0 {
sink = bits.TrailingZeros8(x)
x &= x - 1
}
}
})
// and similarly so for 16, 32, 64
}
name old time/op new time/op delta
VisitBits/8-8 7.27ns ± 4% 5.58ns ± 4% -23.35% (p=0.000 n=28+26)
VisitBits/16-8 14.7ns ± 7% 10.5ns ± 4% -28.43% (p=0.000 n=30+28)
VisitBits/32-8 27.6ns ± 8% 19.3ns ± 3% -30.14% (p=0.000 n=30+26)
VisitBits/64-8 44.0ns ±11% 38.0ns ± 5% -13.48% (p=0.000 n=30+30)
Fixes #25077
Change-Id: Ie6e5bd86baf39ee8a4ca7cadcf56d934e047f957
Reviewed-on: https://go-review.googlesource.com/109358
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
adbb6ec903
commit
d9a50a6531
19 changed files with 347 additions and 32 deletions
|
|
@ -365,7 +365,7 @@ var opMax = map[Op]int64{
|
|||
OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
|
||||
}
|
||||
|
||||
// isNonNegative returns true if v is known to be non-negative.
|
||||
// isNonNegative reports whether v is known to be non-negative.
|
||||
func (ft *factsTable) isNonNegative(v *Value) bool {
|
||||
if isNonNegative(v) {
|
||||
return true
|
||||
|
|
@ -734,34 +734,48 @@ func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r rel
|
|||
}
|
||||
}
|
||||
|
||||
var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero}
|
||||
|
||||
// simplifyBlock simplifies some constant values in b and evaluates
|
||||
// branches to non-uniquely dominated successors of b.
|
||||
func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
|
||||
// Replace OpSlicemask operations in b with constants where possible.
|
||||
for _, v := range b.Values {
|
||||
if v.Op != OpSlicemask {
|
||||
continue
|
||||
}
|
||||
x, delta := isConstDelta(v.Args[0])
|
||||
if x == nil {
|
||||
continue
|
||||
}
|
||||
// slicemask(x + y)
|
||||
// if x is larger than -y (y is negative), then slicemask is -1.
|
||||
lim, ok := ft.limits[x.ID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if lim.umin > uint64(-delta) {
|
||||
if v.Args[0].Op == OpAdd64 {
|
||||
v.reset(OpConst64)
|
||||
} else {
|
||||
v.reset(OpConst32)
|
||||
switch v.Op {
|
||||
case OpSlicemask:
|
||||
// Replace OpSlicemask operations in b with constants where possible.
|
||||
x, delta := isConstDelta(v.Args[0])
|
||||
if x == nil {
|
||||
continue
|
||||
}
|
||||
if b.Func.pass.debug > 0 {
|
||||
b.Func.Warnl(v.Pos, "Proved slicemask not needed")
|
||||
// slicemask(x + y)
|
||||
// if x is larger than -y (y is negative), then slicemask is -1.
|
||||
lim, ok := ft.limits[x.ID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if lim.umin > uint64(-delta) {
|
||||
if v.Args[0].Op == OpAdd64 {
|
||||
v.reset(OpConst64)
|
||||
} else {
|
||||
v.reset(OpConst32)
|
||||
}
|
||||
if b.Func.pass.debug > 0 {
|
||||
b.Func.Warnl(v.Pos, "Proved slicemask not needed")
|
||||
}
|
||||
v.AuxInt = -1
|
||||
}
|
||||
case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
|
||||
// On some architectures, notably amd64, we can generate much better
|
||||
// code for CtzNN if we know that the argument is non-zero.
|
||||
// Capture that information here for use in arch-specific optimizations.
|
||||
x := v.Args[0]
|
||||
lim, ok := ft.limits[x.ID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
|
||||
v.Op = ctzNonZeroOp[v.Op]
|
||||
}
|
||||
v.AuxInt = -1
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -818,7 +832,7 @@ func removeBranch(b *Block, branch branch) {
|
|||
}
|
||||
}
|
||||
|
||||
// isNonNegative returns true is v is known to be greater or equal to zero.
|
||||
// isNonNegative reports whether v is known to be greater or equal to zero.
|
||||
func isNonNegative(v *Value) bool {
|
||||
switch v.Op {
|
||||
case OpConst64:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue