cmd/compile: add known bits pass

This pass performs bitwise constant folding.

It's main goal is to optimize bitfields like generated by defer.

You might have 3 defers in a function and the middle one is always taken,
previously we couldn't remove the branch for it, this pass is able to do so.

This is hit 93 times uniqued by LOC when building the std.

My first thought was to implement this as parts of the limits code.
However the way limits allows to set knownBits tighter and vice-versa
means the code complexity between the two is multiplicative.
Thus I have avoided this, someone might change it in the future
but I don't have a good usecase now and this simple pass is sufficient.

I have tried multiple places for the pass,
we need it before any opt (here late opt) since we need the generic rules
to optimize any user of a constant folded value.

We also want one run of known bits after prove since prove removing some
never / always taken branches allows known bits to do a better job.

This yields real optimizations when you have a defer inside an always
taken branch.

I've thought prove might do a better job if some branches were removed by
running an early known bits first.
However after trying it, this never helped.

I am sure you can build an example where this becomes true, but at least
in the code I've looked at it didn't help.
Thus I decided against running known bits twice (before and after prove).

Fixes #78633

Change-Id: I90a46875cc11d5d26367f00ac83c29fed433cb6d
Reviewed-on: https://go-review.googlesource.com/c/go/+/765560
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Jorropo 2026-04-10 23:25:26 +02:00 committed by Gopher Robot
parent 13cab13f78
commit 7a8dcab743
7 changed files with 429 additions and 1 deletions

View file

@ -127,6 +127,11 @@ func genAllocators() {
typ: "[]uint",
base: "LimitSlice",
},
{
name: "KnownBitsEntriesSlice",
typ: "[]knownBitsEntry",
base: "LimitSlice",
},
}
w := new(bytes.Buffer)

View file

@ -357,3 +357,29 @@ func (c *Cache) freeUintSlice(s []uint) {
}
c.freeLimitSlice(*(*[]limit)(unsafe.Pointer(&b)))
}
func (c *Cache) allocKnownBitsEntriesSlice(n int) []knownBitsEntry {
var base limit
var derived knownBitsEntry
if unsafe.Sizeof(base)%unsafe.Sizeof(derived) != 0 {
panic("bad")
}
scale := unsafe.Sizeof(base) / unsafe.Sizeof(derived)
b := c.allocLimitSlice(int((uintptr(n) + scale - 1) / scale))
s := unsafeheader.Slice{
Data: unsafe.Pointer(&b[0]),
Len: n,
Cap: cap(b) * int(scale),
}
return *(*[]knownBitsEntry)(unsafe.Pointer(&s))
}
func (c *Cache) freeKnownBitsEntriesSlice(s []knownBitsEntry) {
var base limit
var derived knownBitsEntry
scale := unsafe.Sizeof(base) / unsafe.Sizeof(derived)
b := unsafeheader.Slice{
Data: unsafe.Pointer(&s[0]),
Len: int((uintptr(len(s)) + scale - 1) / scale),
Cap: int((uintptr(cap(s)) + scale - 1) / scale),
}
c.freeLimitSlice(*(*[]limit)(unsafe.Pointer(&b)))
}

View file

@ -472,6 +472,7 @@ var passes = [...]pass{
{name: "divisible", fn: divisible, required: true},
{name: "divmod", fn: divmod, required: true},
{name: "middle opt", fn: opt, required: true},
{name: "known bits", fn: knownBits},
{name: "early fuse", fn: fuseEarly},
{name: "expand calls", fn: expandCalls, required: true},
{name: "decompose builtin", fn: postExpandCallsDecompose, required: true},
@ -602,6 +603,12 @@ var passOrder = [...]constraint{
{"branchelim", "lower"},
// lower needs cpu feature information (for SIMD)
{"cpufeatures", "lower"},
// known bits is an arch-independent pass.
{"known bits", "lower"},
// known bits does very little except some fancy constant folding and we need opt to clean it up.
{"known bits", "late opt"},
// known bits does a better job once prove cleaned up some always taken and never taken branches.
{"prove", "known bits"},
}
func init() {

View file

@ -0,0 +1,178 @@
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
func (kb *knownBitsState) fold(v *Value) (value, known int64) {
if kb.seenValues.Test(uint32(v.ID)) {
return kb.entries[v.ID].value, kb.entries[v.ID].known
}
defer func() {
// maintain the two invariants:
// 2. all values are sign-extended to int64 (inspired by RISC-V's xlen=64)
switch v.Type.Size() {
case 1:
value = int64(int8(value))
known = int64(int8(known))
case 2:
value = int64(int16(value))
known = int64(int16(known))
case 4:
value = int64(int32(value))
known = int64(int32(known))
case 8:
default:
panic("unreachable; unknown integer size")
}
// 1. unknown bits are always set to 0 inside value
value &= known
if v.Block.Func.pass.debug > 1 {
v.Block.Func.Warnl(v.Pos, "known bits state %v: k:%d v:%d", v, known, value)
}
kb.entries[v.ID].known = known
kb.entries[v.ID].value = value
}()
kb.seenValues.Set(uint32(v.ID)) // set seen early to give up on loops
switch v.Op {
// TODO: Shifts, rotates, extensions, ...
case OpConst64, OpConst32, OpConst16, OpConst8:
return v.AuxInt, -1
case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
x, xk := kb.fold(v.Args[0])
y, yk := kb.fold(v.Args[1])
onesInBoth := x & y
zerosInX := ^x & xk
zerosInY := ^y & yk
return x & y, onesInBoth | zerosInX | zerosInY
case OpOr64, OpOr32, OpOr16, OpOr8:
x, xk := kb.fold(v.Args[0])
y, yk := kb.fold(v.Args[1])
zerosInBoth := ^x & ^y & (xk & yk)
onesInX := x
onesInY := y
return x | y, onesInX | onesInY | zerosInBoth
case OpXor64, OpXor32, OpXor16, OpXor8:
x, xk := kb.fold(v.Args[0])
y, yk := kb.fold(v.Args[1])
return x ^ y, xk & yk
case OpCom64, OpCom32, OpCom16, OpCom8:
x, xk := kb.fold(v.Args[0])
return ^x, xk
case OpPhi:
set := false
for i, arg := range v.Args {
if !kb.isLiveInEdge(v.Block, uint(i)) {
continue
}
a, k := kb.fold(arg)
if !set {
value, known = a, k
set = true
} else {
known &^= value ^ a
known &= k
}
if known == 0 {
break
}
}
return value, known
case OpCopy:
return kb.fold(v.Args[0])
default:
return 0, 0
}
}
// knownBits does constant folding across bitfields
func knownBits(f *Func) {
kb := &knownBitsState{
entries: f.Cache.allocKnownBitsEntriesSlice(f.NumValues()),
seenValues: f.Cache.allocBitset(f.NumValues()),
reachableBlocks: f.Cache.allocBitset(f.NumBlocks()),
}
defer f.Cache.freeKnownBitsEntriesSlice(kb.entries)
defer f.Cache.freeBitset(kb.seenValues)
defer f.Cache.freeBitset(kb.reachableBlocks)
clear(kb.seenValues)
clear(kb.entries)
clear(kb.reachableBlocks)
blocks := f.postorder()
for _, b := range blocks {
kb.reachableBlocks.Set(uint32(b.ID))
}
for _, b := range blocks {
for _, v := range b.Values {
if v.Uses == 0 || !v.Type.IsInteger() {
continue
}
switch v.Op {
case OpConst64, OpConst32, OpConst16, OpConst8:
continue
}
val, k := kb.fold(v)
if k != -1 {
continue
}
if f.pass.debug > 0 {
f.Warnl(v.Pos, "known value of %v (%v): %d", v, v.Op, val)
}
var c *Value
switch v.Type.Size() {
case 1:
c = f.ConstInt8(v.Type, int8(val))
case 2:
c = f.ConstInt16(v.Type, int16(val))
case 4:
c = f.ConstInt32(v.Type, int32(val))
case 8:
c = f.ConstInt64(v.Type, val)
default:
panic("unreachable; unknown integer size")
}
v.copyOf(c)
}
}
}
type knownBitsState struct {
entries []knownBitsEntry // indexed by Value.ID
seenValues bitset // indexed by Value.ID (at the bit level)
reachableBlocks bitset // indexed by Block.ID (at the bit level)
}
type knownBitsEntry struct {
// Two invariants:
// 1. unknown bits are always set to 0 inside value
// 2. all values are sign-extended to int64 (inspired by RISC-V's xlen=64)
// This means let's say you know an 8 bits value is 0b10??????,
// known = int64(int8(0b11000000))
// value = int64(int8(0b10000000))
known, value int64
}
func (kb *knownBitsState) isLiveInEdge(b *Block, index uint) bool {
inEdge := b.Preds[index]
return kb.isLiveOutEdge(inEdge.b, uint(inEdge.i))
}
func (kb *knownBitsState) isLiveOutEdge(b *Block, index uint) bool {
if !kb.reachableBlocks.Test(uint32(b.ID)) {
return false
}
switch b.Kind {
case BlockFirst:
return index == 0
case BlockPlain, BlockIf, BlockDefer, BlockRet, BlockRetJmp, BlockExit, BlockJumpTable:
return true
default:
panic("unreachable; unknown block kind")
}
}

View file

@ -18,8 +18,20 @@ const uintSize = 32 << (^uint(0) >> 63) // 32 or 64
// bitset is a bit array for dense indexes.
type bitset []uint
func computeBitsetSize(n int) int {
return (n + uintSize - 1) / uintSize
}
func newBitset(n int) bitset {
return make(bitset, (n+uintSize-1)/uintSize)
return make(bitset, computeBitsetSize(n))
}
func (c *Cache) allocBitset(n int) bitset {
return bitset(c.allocUintSlice(computeBitsetSize(n)))
}
func (c *Cache) freeBitset(bs bitset) {
c.freeUintSlice([]uint(bs))
}
func (bs bitset) Reset() {

View file

@ -0,0 +1,51 @@
// asmcheck
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
func knownBitsPhiAnd(cond bool) int {
x := 1
if cond {
x = 3
}
// amd64:-"AND"
// arm64:-"AND"
return x & 1
}
func knownBitsDeferPattern(a, b bool) int {
bits := 0
bits |= 1 << 0
if a {
bits |= 1 << 1
}
bits |= 1 << 2
if b {
bits |= 1 << 3
}
// amd64:-"AND"
// arm64:-"AND"
return bits & (1<<2 | 1<<0)
}
func knownBitsXorToggle(a, b, c bool) int {
bits := 0
bits ^= 1 << 0
if a {
bits ^= 1 << 1
}
bits ^= 1 << 2
if b {
bits ^= 1 << 3
}
bits ^= 1 << 2
if c {
bits ^= 1 << 4
}
// amd64:-"AND"
// arm64:-"AND"
return bits & (1<<2 | 1<<0)
}

149
test/known_bits.go Normal file
View file

@ -0,0 +1,149 @@
// errorcheck -0 -d=ssa/known_bits/debug=1
//go:build amd64 || arm64 || s390x || ppc64le || riscv64
// Copyright 2026 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package a
func knownBitsPhiAnd(cond bool) int {
x := 1
if cond {
x = 3
}
return x & 1 // ERROR "known value of v[0-9]+ \(And64\): 1$"
}
func knownBitsPhiAndGarbage(cond bool, x int) int {
x &^= 1
if cond {
x = 2
}
return x & 1 // ERROR "known value of v[0-9]+ \(And64\): 0$"
}
func unknownBitsPhiAnd(cond bool) int {
x := 1
if cond {
x = 2
}
return x & 1
}
func knownBitsOrGarbage(x, unknown int) int {
x |= 7
x |= unknown &^ 3
return x & 3 // ERROR "known value of v[0-9]+ \(And64\): 3$"
}
func unknownBitsOrGarbage(x, unknown int) int {
x |= 1
x |= unknown
return x & 3
}
func knownBitsDeferPattern(a, b bool) int {
bits := 0
bits |= 1 << 0
if a {
bits |= 1 << 1
}
bits |= 1 << 2
if b {
bits |= 1 << 3
}
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 5$"
}
func knownBitsDeferPatternGarbage(a, b bool, garbage int) int {
bits := 0
bits |= 1 << 0
if a {
bits |= 1 << 1
}
bits |= 1 << 2
if b {
bits |= 1 << 3
}
bits ^= garbage &^ (1<<2 | 1<<0)
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 5$"
}
func knownBitsXorToggle(a, b, c bool) int {
bits := 0
bits ^= 1 << 0
if a {
bits ^= 1 << 1
}
bits ^= 1 << 2
if b {
bits ^= 1 << 3
}
bits ^= 1 << 2
if c {
bits ^= 1 << 4
}
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 1$"
}
func knownBitsXorToggleGarbage(a, b, c bool, garbage int) int {
bits := 0
bits ^= 1 << 0
if a {
bits ^= 1 << 1
}
bits ^= 1 << 2
if b {
bits ^= 1 << 3
}
bits ^= 1 << 2
if c {
bits ^= 1 << 4
}
bits ^= garbage &^ (1<<2 | 1<<0)
return bits & (1<<2 | 1<<0) // ERROR "known value of v[0-9]+ \(And64\): 1$"
}
func unknownBitsXorToggle(a, b, c bool) int {
bits := 0
bits ^= 1 << 0
if a {
bits ^= 1 << 1
}
bits ^= 1 << 2
if b {
bits ^= 1 << 2
}
bits ^= 1 << 2
if c {
bits ^= 1 << 4
}
return bits & (1<<2 | 1<<0)
}
func knownBitsPhiComAnd(cond bool) int {
x := 1
if cond {
x = 3
}
return ^x & 1 // ERROR "known value of v[0-9]+ \(And64\): 0$"
}
func knownBitsPhiComAndGarbage(cond bool, garbage int) int {
x := 1
if cond {
x = 3
}
x ^= garbage &^ 1
return ^x & 1 // ERROR "known value of v[0-9]+ \(And64\): 0$"
}
func unknownBitsPhiComAnd(cond bool) int {
x := 1
if cond {
x = 2
}
return ^x & 1
}