cmd/compile: improve loopRotate to handle nested loops

Enhance loop rotation of nested loops. Currently, loops are processed independently,
resulting in unnecessary jumps between outer and inner loops. By processing inner
loops before their parent loop, we ensure nested loop blocks are
properly placed within their parent loop's block sequence.

There is some code size improvement (as measured on amd64) due to jumps
to/from inner loop are removed by the updated loopRotate block order:

Executable            Old .text  New .text     Change
-------------------------------------------------------
asm                     2147569    2146481     -0.05%
cgo                     1977457    1975761     -0.09%
compile                10447345   10441905     -0.05%
cover                   2110097    2108977     -0.05%
link                    2930289    2929041     -0.04%
preprofile               927345     926769     -0.06%
vet                     3279057    3277009     -0.06%

Change-Id: I4b9e993c2be07fad735e6bcf32d062d099d9cfb5
Reviewed-on: https://go-review.googlesource.com/c/go/+/684335
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Alexander Musman 2025-05-13 09:44:17 +03:00 committed by Gopher Robot
parent dcb479c2f9
commit 592c2db868
2 changed files with 142 additions and 8 deletions

View file

@ -4,6 +4,10 @@
package ssa
import (
"slices"
)
// loopRotate converts loops with a check-loop-condition-at-beginning
// to loops with a check-loop-condition-at-end.
// This helps loops avoid extra unnecessary jumps.
@ -41,10 +45,65 @@ func loopRotate(f *Func) {
// Map from block ID to the moving blocks that should
// come right after it.
// If a block, which has its ID present in keys of the 'after' map,
// occurs in some other block's 'after' list, that represents whole
// nested loop, e.g. consider an inner loop I nested into an outer
// loop O. It and Ot are corresponding top block for these loops
// chosen by our algorithm, and It is in the Ot's 'after' list.
//
// Before: After:
//
// e e
// │ │
// │ │Ot ◄───┐
// ▼ ▼▼ │
// ┌───Oh ◄────┐ ┌─┬─Oh │
// │ │ │ │ │ │
// │ │ │ │ │ It◄───┐ │
// │ ▼ │ │ │ ▼ │ │
// │ ┌─Ih◄───┐ │ │ └►Ih │ │
// │ │ │ │ │ │ ┌─┤ │ │
// │ │ ▼ │ │ │ │ ▼ │ │
// │ │ Ib │ │ │ │ Ib │ │
// │ │ └─►It─┘ │ │ │ └─────┘ │
// │ │ │ │ │ │
// │ └►Ie │ │ └►Ie │
// │ └─►Ot───┘ │ └───────┘
// │ │
// └──►Oe └──►Oe
//
// We build the 'after' lists for each of the top blocks Ot and It:
// after[Ot]: Oh, It, Ie
// after[It]: Ih, Ib
after := map[ID][]*Block{}
// Map from loop header ID to the new top block for the loop.
tops := map[ID]*Block{}
// Order loops to rotate any child loop before adding its top block
// to the parent loop's 'after' list.
loopnest.calculateDepths()
loopOrder := f.Cache.allocIntSlice(len(loopnest.loops))
for i := range loopOrder {
loopOrder[i] = i
}
defer f.Cache.freeIntSlice(loopOrder)
slices.SortFunc(loopOrder, func(i, j int) int {
di := loopnest.loops[i].depth
dj := loopnest.loops[j].depth
switch {
case di > dj:
return -1
case di < dj:
return 1
default:
return 0
}
})
// Check each loop header and decide if we want to move it.
for _, loop := range loopnest.loops {
for _, loopIdx := range loopOrder {
loop := loopnest.loops[loopIdx]
b := loop.header
var p *Block // b's in-loop predecessor
for _, e := range b.Preds {
@ -59,6 +118,7 @@ func loopRotate(f *Func) {
if p == nil {
continue
}
tops[loop.header.ID] = p
p.Hotness |= HotInitial
if f.IsPgoHot {
p.Hotness |= HotPgo
@ -80,9 +140,11 @@ func loopRotate(f *Func) {
if nextb == p { // original loop predecessor is next
break
}
if loopnest.b2l[nextb.ID] == loop {
if bloop := loopnest.b2l[nextb.ID]; bloop != nil {
if bloop == loop || bloop.outer == loop && tops[bloop.header.ID] == nextb {
after[p.ID] = append(after[p.ID], nextb)
}
}
b = nextb
}
// Swap b and p so that we'll handle p before b when moving blocks.
@ -90,7 +152,7 @@ func loopRotate(f *Func) {
f.Blocks[idToIdx[p.ID]] = loop.header
idToIdx[loop.header.ID], idToIdx[p.ID] = idToIdx[p.ID], idToIdx[loop.header.ID]
// Place b after p.
// Place loop blocks after p.
for _, b := range after[p.ID] {
move[b.ID] = struct{}{}
}
@ -107,16 +169,23 @@ func loopRotate(f *Func) {
oldOrder := f.Cache.allocBlockSlice(len(f.Blocks))
defer f.Cache.freeBlockSlice(oldOrder)
copy(oldOrder, f.Blocks)
var moveBlocks func(bs []*Block)
moveBlocks = func(blocks []*Block) {
for _, a := range blocks {
f.Blocks[j] = a
j++
if nextBlocks, ok := after[a.ID]; ok {
moveBlocks(nextBlocks)
}
}
}
for _, b := range oldOrder {
if _, ok := move[b.ID]; ok {
continue
}
f.Blocks[j] = b
j++
for _, a := range after[b.ID] {
f.Blocks[j] = a
j++
}
moveBlocks(after[b.ID])
}
if j != len(oldOrder) {
f.Fatalf("bad reordering in looprotate")

View file

@ -0,0 +1,65 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"testing"
)
func TestLoopRotateNested(t *testing.T) {
c := testConfig(t)
fun := c.Fun("entry",
Bloc("entry",
Valu("mem", OpInitMem, types.TypeMem, 0, nil),
Valu("constTrue", OpConstBool, types.Types[types.TBOOL], 1, nil),
Goto("outerHeader")),
Bloc("outerHeader",
If("constTrue", "outerBody", "outerExit")),
Bloc("outerBody",
Goto("innerHeader")),
Bloc("innerHeader",
If("constTrue", "innerBody", "innerExit")),
Bloc("innerBody",
Goto("innerTop")),
Bloc("innerTop",
Goto("innerHeader")),
Bloc("innerExit",
Goto("outerTop")),
Bloc("outerTop",
Goto("outerHeader")),
Bloc("outerExit",
Exit("mem")))
blockName := make([]string, len(fun.f.Blocks)+1)
for name, block := range fun.blocks {
blockName[block.ID] = name
}
CheckFunc(fun.f)
loopRotate(fun.f)
CheckFunc(fun.f)
// Verify the resulting block order
expected := []string{
"entry",
"outerTop",
"outerHeader",
"outerBody",
"innerTop",
"innerHeader",
"innerBody",
"innerExit",
"outerExit",
}
if len(expected) != len(fun.f.Blocks) {
t.Fatalf("expected %d blocks, found %d", len(expected), len(fun.f.Blocks))
}
for i, b := range fun.f.Blocks {
if expected[i] != blockName[b.ID] {
t.Errorf("position %d: expected %s, found %s", i, expected[i], blockName[b.ID])
}
}
}