mirror of
https://github.com/golang/go.git
synced 2026-06-27 03:11:23 +00:00
cmd/compile: add known bits pass
This pass performs bitwise constant folding. It's main goal is to optimize bitfields like generated by defer. You might have 3 defers in a function and the middle one is always taken, previously we couldn't remove the branch for it, this pass is able to do so. This is hit 93 times uniqued by LOC when building the std. My first thought was to implement this as parts of the limits code. However the way limits allows to set knownBits tighter and vice-versa means the code complexity between the two is multiplicative. Thus I have avoided this, someone might change it in the future but I don't have a good usecase now and this simple pass is sufficient. I have tried multiple places for the pass, we need it before any opt (here late opt) since we need the generic rules to optimize any user of a constant folded value. We also want one run of known bits after prove since prove removing some never / always taken branches allows known bits to do a better job. This yields real optimizations when you have a defer inside an always taken branch. I've thought prove might do a better job if some branches were removed by running an early known bits first. However after trying it, this never helped. I am sure you can build an example where this becomes true, but at least in the code I've looked at it didn't help. Thus I decided against running known bits twice (before and after prove). Fixes #78633 Change-Id: I90a46875cc11d5d26367f00ac83c29fed433cb6d Reviewed-on: https://go-review.googlesource.com/c/go/+/765560 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Jorropo <jorropo.pgm@gmail.com> Reviewed-by: Carlos Amedee <carlos@golang.org> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
13cab13f78
commit
7a8dcab743
7 changed files with 429 additions and 1 deletions
|
|
@ -127,6 +127,11 @@ func genAllocators() {
|
|||
typ: "[]uint",
|
||||
base: "LimitSlice",
|
||||
},
|
||||
{
|
||||
name: "KnownBitsEntriesSlice",
|
||||
typ: "[]knownBitsEntry",
|
||||
base: "LimitSlice",
|
||||
},
|
||||
}
|
||||
|
||||
w := new(bytes.Buffer)
|
||||
|
|
|
|||
|
|
@ -357,3 +357,29 @@ func (c *Cache) freeUintSlice(s []uint) {
|
|||
}
|
||||
c.freeLimitSlice(*(*[]limit)(unsafe.Pointer(&b)))
|
||||
}
|
||||
func (c *Cache) allocKnownBitsEntriesSlice(n int) []knownBitsEntry {
|
||||
var base limit
|
||||
var derived knownBitsEntry
|
||||
if unsafe.Sizeof(base)%unsafe.Sizeof(derived) != 0 {
|
||||
panic("bad")
|
||||
}
|
||||
scale := unsafe.Sizeof(base) / unsafe.Sizeof(derived)
|
||||
b := c.allocLimitSlice(int((uintptr(n) + scale - 1) / scale))
|
||||
s := unsafeheader.Slice{
|
||||
Data: unsafe.Pointer(&b[0]),
|
||||
Len: n,
|
||||
Cap: cap(b) * int(scale),
|
||||
}
|
||||
return *(*[]knownBitsEntry)(unsafe.Pointer(&s))
|
||||
}
|
||||
func (c *Cache) freeKnownBitsEntriesSlice(s []knownBitsEntry) {
|
||||
var base limit
|
||||
var derived knownBitsEntry
|
||||
scale := unsafe.Sizeof(base) / unsafe.Sizeof(derived)
|
||||
b := unsafeheader.Slice{
|
||||
Data: unsafe.Pointer(&s[0]),
|
||||
Len: int((uintptr(len(s)) + scale - 1) / scale),
|
||||
Cap: int((uintptr(cap(s)) + scale - 1) / scale),
|
||||
}
|
||||
c.freeLimitSlice(*(*[]limit)(unsafe.Pointer(&b)))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -472,6 +472,7 @@ var passes = [...]pass{
|
|||
{name: "divisible", fn: divisible, required: true},
|
||||
{name: "divmod", fn: divmod, required: true},
|
||||
{name: "middle opt", fn: opt, required: true},
|
||||
{name: "known bits", fn: knownBits},
|
||||
{name: "early fuse", fn: fuseEarly},
|
||||
{name: "expand calls", fn: expandCalls, required: true},
|
||||
{name: "decompose builtin", fn: postExpandCallsDecompose, required: true},
|
||||
|
|
@ -602,6 +603,12 @@ var passOrder = [...]constraint{
|
|||
{"branchelim", "lower"},
|
||||
// lower needs cpu feature information (for SIMD)
|
||||
{"cpufeatures", "lower"},
|
||||
// known bits is an arch-independent pass.
|
||||
{"known bits", "lower"},
|
||||
// known bits does very little except some fancy constant folding and we need opt to clean it up.
|
||||
{"known bits", "late opt"},
|
||||
// known bits does a better job once prove cleaned up some always taken and never taken branches.
|
||||
{"prove", "known bits"},
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
|
|
|||
178
src/cmd/compile/internal/ssa/known_bits.go
Normal file
178
src/cmd/compile/internal/ssa/known_bits.go
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ssa
|
||||
|
||||
func (kb *knownBitsState) fold(v *Value) (value, known int64) {
|
||||
if kb.seenValues.Test(uint32(v.ID)) {
|
||||
return kb.entries[v.ID].value, kb.entries[v.ID].known
|
||||
}
|
||||
defer func() {
|
||||
// maintain the two invariants:
|
||||
// 2. all values are sign-extended to int64 (inspired by RISC-V's xlen=64)
|
||||
switch v.Type.Size() {
|
||||
case 1:
|
||||
value = int64(int8(value))
|
||||
known = int64(int8(known))
|
||||
case 2:
|
||||
value = int64(int16(value))
|
||||
known = int64(int16(known))
|
||||
case 4:
|
||||
value = int64(int32(value))
|
||||
known = int64(int32(known))
|
||||
case 8:
|
||||
default:
|
||||
panic("unreachable; unknown integer size")
|
||||
}
|
||||
|
||||
// 1. unknown bits are always set to 0 inside value
|
||||
value &= known
|
||||
|
||||
if v.Block.Func.pass.debug > 1 {
|
||||
v.Block.Func.Warnl(v.Pos, "known bits state %v: k:%d v:%d", v, known, value)
|
||||
}
|
||||
kb.entries[v.ID].known = known
|
||||
kb.entries[v.ID].value = value
|
||||
}()
|
||||
kb.seenValues.Set(uint32(v.ID)) // set seen early to give up on loops
|
||||
|
||||
switch v.Op {
|
||||
// TODO: Shifts, rotates, extensions, ...
|
||||
case OpConst64, OpConst32, OpConst16, OpConst8:
|
||||
return v.AuxInt, -1
|
||||
case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
|
||||
x, xk := kb.fold(v.Args[0])
|
||||
y, yk := kb.fold(v.Args[1])
|
||||
onesInBoth := x & y
|
||||
zerosInX := ^x & xk
|
||||
zerosInY := ^y & yk
|
||||
return x & y, onesInBoth | zerosInX | zerosInY
|
||||
case OpOr64, OpOr32, OpOr16, OpOr8:
|
||||
x, xk := kb.fold(v.Args[0])
|
||||
y, yk := kb.fold(v.Args[1])
|
||||
zerosInBoth := ^x & ^y & (xk & yk)
|
||||
onesInX := x
|
||||
onesInY := y
|
||||
return x | y, onesInX | onesInY | zerosInBoth
|
||||
case OpXor64, OpXor32, OpXor16, OpXor8:
|
||||
x, xk := kb.fold(v.Args[0])
|
||||
y, yk := kb.fold(v.Args[1])
|
||||
return x ^ y, xk & yk
|
||||
case OpCom64, OpCom32, OpCom16, OpCom8:
|
||||
x, xk := kb.fold(v.Args[0])
|
||||
return ^x, xk
|
||||
case OpPhi:
|
||||
set := false
|
||||
for i, arg := range v.Args {
|
||||
if !kb.isLiveInEdge(v.Block, uint(i)) {
|
||||
continue
|
||||
}
|
||||
a, k := kb.fold(arg)
|
||||
if !set {
|
||||
value, known = a, k
|
||||
set = true
|
||||
} else {
|
||||
known &^= value ^ a
|
||||
known &= k
|
||||
}
|
||||
if known == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return value, known
|
||||
case OpCopy:
|
||||
return kb.fold(v.Args[0])
|
||||
default:
|
||||
return 0, 0
|
||||
}
|
||||
}
|
||||
|
||||
// knownBits does constant folding across bitfields
|
||||
func knownBits(f *Func) {
|
||||
kb := &knownBitsState{
|
||||
entries: f.Cache.allocKnownBitsEntriesSlice(f.NumValues()),
|
||||
seenValues: f.Cache.allocBitset(f.NumValues()),
|
||||
reachableBlocks: f.Cache.allocBitset(f.NumBlocks()),
|
||||
}
|
||||
defer f.Cache.freeKnownBitsEntriesSlice(kb.entries)
|
||||
defer f.Cache.freeBitset(kb.seenValues)
|
||||
defer f.Cache.freeBitset(kb.reachableBlocks)
|
||||
clear(kb.seenValues)
|
||||
clear(kb.entries)
|
||||
clear(kb.reachableBlocks)
|
||||
|
||||
blocks := f.postorder()
|
||||
for _, b := range blocks {
|
||||
kb.reachableBlocks.Set(uint32(b.ID))
|
||||
}
|
||||
|
||||
for _, b := range blocks {
|
||||
for _, v := range b.Values {
|
||||
if v.Uses == 0 || !v.Type.IsInteger() {
|
||||
continue
|
||||
}
|
||||
switch v.Op {
|
||||
case OpConst64, OpConst32, OpConst16, OpConst8:
|
||||
continue
|
||||
}
|
||||
val, k := kb.fold(v)
|
||||
if k != -1 {
|
||||
continue
|
||||
}
|
||||
if f.pass.debug > 0 {
|
||||
f.Warnl(v.Pos, "known value of %v (%v): %d", v, v.Op, val)
|
||||
}
|
||||
var c *Value
|
||||
switch v.Type.Size() {
|
||||
case 1:
|
||||
c = f.ConstInt8(v.Type, int8(val))
|
||||
case 2:
|
||||
c = f.ConstInt16(v.Type, int16(val))
|
||||
case 4:
|
||||
c = f.ConstInt32(v.Type, int32(val))
|
||||
case 8:
|
||||
c = f.ConstInt64(v.Type, val)
|
||||
default:
|
||||
panic("unreachable; unknown integer size")
|
||||
}
|
||||
v.copyOf(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type knownBitsState struct {
|
||||
entries []knownBitsEntry // indexed by Value.ID
|
||||
seenValues bitset // indexed by Value.ID (at the bit level)
|
||||
reachableBlocks bitset // indexed by Block.ID (at the bit level)
|
||||
}
|
||||
|
||||
type knownBitsEntry struct {
|
||||
// Two invariants:
|
||||
// 1. unknown bits are always set to 0 inside value
|
||||
// 2. all values are sign-extended to int64 (inspired by RISC-V's xlen=64)
|
||||
// This means let's say you know an 8 bits value is 0b10??????,
|
||||
// known = int64(int8(0b11000000))
|
||||
// value = int64(int8(0b10000000))
|
||||
known, value int64
|
||||
}
|
||||
|
||||
func (kb *knownBitsState) isLiveInEdge(b *Block, index uint) bool {
|
||||
inEdge := b.Preds[index]
|
||||
return kb.isLiveOutEdge(inEdge.b, uint(inEdge.i))
|
||||
}
|
||||
|
||||
func (kb *knownBitsState) isLiveOutEdge(b *Block, index uint) bool {
|
||||
if !kb.reachableBlocks.Test(uint32(b.ID)) {
|
||||
return false
|
||||
}
|
||||
|
||||
switch b.Kind {
|
||||
case BlockFirst:
|
||||
return index == 0
|
||||
case BlockPlain, BlockIf, BlockDefer, BlockRet, BlockRetJmp, BlockExit, BlockJumpTable:
|
||||
return true
|
||||
default:
|
||||
panic("unreachable; unknown block kind")
|
||||
}
|
||||
}
|
||||
|
|
@ -18,8 +18,20 @@ const uintSize = 32 << (^uint(0) >> 63) // 32 or 64
|
|||
// bitset is a bit array for dense indexes.
|
||||
type bitset []uint
|
||||
|
||||
func computeBitsetSize(n int) int {
|
||||
return (n + uintSize - 1) / uintSize
|
||||
}
|
||||
|
||||
func newBitset(n int) bitset {
|
||||
return make(bitset, (n+uintSize-1)/uintSize)
|
||||
return make(bitset, computeBitsetSize(n))
|
||||
}
|
||||
|
||||
func (c *Cache) allocBitset(n int) bitset {
|
||||
return bitset(c.allocUintSlice(computeBitsetSize(n)))
|
||||
}
|
||||
|
||||
func (c *Cache) freeBitset(bs bitset) {
|
||||
c.freeUintSlice([]uint(bs))
|
||||
}
|
||||
|
||||
func (bs bitset) Reset() {
|
||||
|
|
|
|||
51
test/codegen/known_bits.go
Normal file
51
test/codegen/known_bits.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
// asmcheck
|
||||
|
||||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package codegen
|
||||
|
||||
func knownBitsPhiAnd(cond bool) int {
|
||||
x := 1
|
||||
if cond {
|
||||
x = 3
|
||||
}
|
||||
// amd64:-"AND"
|
||||
// arm64:-"AND"
|
||||
return x & 1
|
||||
}
|
||||
|
||||
func knownBitsDeferPattern(a, b bool) int {
|
||||
bits := 0
|
||||
bits |= 1 << 0
|
||||
if a {
|
||||
bits |= 1 << 1
|
||||
}
|
||||
bits |= 1 << 2
|
||||
if b {
|
||||
bits |= 1 << 3
|
||||
}
|
||||
// amd64:-"AND"
|
||||
// arm64:-"AND"
|
||||
return bits & (1<<2 | 1<<0)
|
||||
}
|
||||
|
||||
func knownBitsXorToggle(a, b, c bool) int {
|
||||
bits := 0
|
||||
bits ^= 1 << 0
|
||||
if a {
|
||||
bits ^= 1 << 1
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if b {
|
||||
bits ^= 1 << 3
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if c {
|
||||
bits ^= 1 << 4
|
||||
}
|
||||
// amd64:-"AND"
|
||||
// arm64:-"AND"
|
||||
return bits & (1<<2 | 1<<0)
|
||||
}
|
||||
149
test/known_bits.go
Normal file
149
test/known_bits.go
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
// errorcheck -0 -d=ssa/known_bits/debug=1
|
||||
|
||||
//go:build amd64 || arm64 || s390x || ppc64le || riscv64
|
||||
|
||||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package a
|
||||
|
||||
func knownBitsPhiAnd(cond bool) int {
|
||||
x := 1
|
||||
if cond {
|
||||
x = 3
|
||||
}
|
||||
return x & 1 // ERROR "known value of v[0-9]+ \(And64\): 1$"
|
||||
}
|
||||
|
||||
func knownBitsPhiAndGarbage(cond bool, x int) int {
|
||||
x &^= 1
|
||||
if cond {
|
||||
x = 2
|
||||
}
|
||||
return x & 1 // ERROR "known value of v[0-9]+ \(And64\): 0$"
|
||||
}
|
||||
|
||||
func unknownBitsPhiAnd(cond bool) int {
|
||||
x := 1
|
||||
if cond {
|
||||
x = 2
|
||||
}
|
||||
return x & 1
|
||||
}
|
||||
|
||||
func knownBitsOrGarbage(x, unknown int) int {
|
||||
x |= 7
|
||||
x |= unknown &^ 3
|
||||
return x & 3 // ERROR "known value of v[0-9]+ \(And64\): 3$"
|
||||
}
|
||||
|
||||
func unknownBitsOrGarbage(x, unknown int) int {
|
||||
x |= 1
|
||||
x |= unknown
|
||||
return x & 3
|
||||
}
|
||||
|
||||
func knownBitsDeferPattern(a, b bool) int {
|
||||
bits := 0
|
||||
bits |= 1 << 0
|
||||
if a {
|
||||
bits |= 1 << 1
|
||||
}
|
||||
bits |= 1 << 2
|
||||
if b {
|
||||
bits |= 1 << 3
|
||||
}
|
||||
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 5$"
|
||||
}
|
||||
|
||||
func knownBitsDeferPatternGarbage(a, b bool, garbage int) int {
|
||||
bits := 0
|
||||
bits |= 1 << 0
|
||||
if a {
|
||||
bits |= 1 << 1
|
||||
}
|
||||
bits |= 1 << 2
|
||||
if b {
|
||||
bits |= 1 << 3
|
||||
}
|
||||
bits ^= garbage &^ (1<<2 | 1<<0)
|
||||
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 5$"
|
||||
}
|
||||
|
||||
func knownBitsXorToggle(a, b, c bool) int {
|
||||
bits := 0
|
||||
bits ^= 1 << 0
|
||||
if a {
|
||||
bits ^= 1 << 1
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if b {
|
||||
bits ^= 1 << 3
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if c {
|
||||
bits ^= 1 << 4
|
||||
}
|
||||
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 1$"
|
||||
}
|
||||
|
||||
func knownBitsXorToggleGarbage(a, b, c bool, garbage int) int {
|
||||
bits := 0
|
||||
bits ^= 1 << 0
|
||||
if a {
|
||||
bits ^= 1 << 1
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if b {
|
||||
bits ^= 1 << 3
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if c {
|
||||
bits ^= 1 << 4
|
||||
}
|
||||
bits ^= garbage &^ (1<<2 | 1<<0)
|
||||
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 1$"
|
||||
}
|
||||
|
||||
func unknownBitsXorToggle(a, b, c bool) int {
|
||||
bits := 0
|
||||
bits ^= 1 << 0
|
||||
if a {
|
||||
bits ^= 1 << 1
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if b {
|
||||
bits ^= 1 << 2
|
||||
}
|
||||
bits ^= 1 << 2
|
||||
if c {
|
||||
bits ^= 1 << 4
|
||||
}
|
||||
return bits & (1<<2 | 1<<0)
|
||||
}
|
||||
|
||||
func knownBitsPhiComAnd(cond bool) int {
|
||||
x := 1
|
||||
if cond {
|
||||
x = 3
|
||||
}
|
||||
return ^x & 1 // ERROR "known value of v[0-9]+ \(And64\): 0$"
|
||||
}
|
||||
|
||||
func knownBitsPhiComAndGarbage(cond bool, garbage int) int {
|
||||
x := 1
|
||||
if cond {
|
||||
x = 3
|
||||
}
|
||||
x ^= garbage &^ 1
|
||||
return ^x & 1 // ERROR "known value of v[0-9]+ \(And64\): 0$"
|
||||
}
|
||||
|
||||
func unknownBitsPhiComAnd(cond bool) int {
|
||||
x := 1
|
||||
if cond {
|
||||
x = 2
|
||||
}
|
||||
return ^x & 1
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue