cmd/compile: check for iteration after range func loop exit

When this happens, panic.

This is a revised version of a check that used #next,
where this one instead uses a per-loop #exit flag,
and catches more problematic iterators.

Updates #56413.
Updates #61405.

Change-Id: I6574f754e475bb67b9236b4f6c25979089f9b629
Reviewed-on: https://go-review.googlesource.com/c/go/+/540263
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
This commit is contained in:
David Chase 2023-11-07 17:20:35 -05:00
parent 6f5aba995f
commit b5e31780b6
10 changed files with 1522 additions and 39 deletions

View file

@ -62,6 +62,7 @@ const (
exprFuncInst
exprRecv
exprReshape
exprRuntimeBuiltin // a reference to a runtime function from transformed syntax. Followed by string name, e.g., "panicrangeexit"
)
type codeAssign int

View file

@ -2442,6 +2442,10 @@ func (r *reader) expr() (res ir.Node) {
n.SetTypecheck(1)
}
return n
case exprRuntimeBuiltin:
builtin := typecheck.LookupRuntime(r.String())
return builtin
}
}

View file

@ -1715,6 +1715,15 @@ func (w *writer) expr(expr syntax.Expr) {
targs := inst.TypeArgs
if tv, ok := w.p.maybeTypeAndValue(expr); ok {
if tv.IsRuntimeHelper() {
if pkg := obj.Pkg(); pkg != nil && pkg.Name() == "runtime" {
objName := obj.Name()
w.Code(exprRuntimeBuiltin)
w.String(objName)
return
}
}
if tv.IsType() {
w.p.fatalf(expr, "unexpected type expression %v", syntax.String(expr))
}

File diff suppressed because it is too large Load diff

View file

@ -141,6 +141,37 @@ TODO: What about:
With this rewrite the "return true" is not visible after yield returns,
but maybe it should be?
# Checking
To permit checking that an iterator is well-behaved -- that is, that
it does not call the loop body again after it has returned false or
after the entire loop has exited (it might retain a copy of the body
function, or pass it to another goroutine) -- each generated loop has
its own #exitK flag that is checked before each iteration, and set both
at any early exit and after the iteration completes.
For example:
for x := range f {
...
if ... { break }
...
}
becomes
{
var #exit1 bool
f(func(x T1) bool {
if #exit1 { runtime.panicrangeexit() }
...
if ... { #exit1 = true ; return false }
...
return true
})
#exit1 = true
}
# Nested Loops
So far we've only considered a single loop. If a function contains a
@ -175,23 +206,30 @@ becomes
#r1 type1
#r2 type2
)
var #exit1 bool
f(func() {
if #exit1 { runtime.panicrangeexit() }
var #exit2 bool
g(func() {
if #exit2 { runtime.panicrangeexit() }
...
{
// return a, b
#r1, #r2 = a, b
#next = -2
#exit1, #exit2 = true, true
return false
}
...
return true
})
#exit2 = true
if #next < 0 {
return false
}
return true
})
#exit1 = true
if #next == -2 {
return #r1, #r2
}
@ -205,7 +243,8 @@ return with a single check.
For a labeled break or continue of an outer range-over-func, we
use positive #next values. Any such labeled break or continue
really means "do N breaks" or "do N breaks and 1 continue".
We encode that as 2*N or 2*N+1 respectively.
We encode that as perLoopStep*N or perLoopStep*N+1 respectively.
Loops that might need to propagate a labeled break or continue
add one or both of these to the #next checks:
@ -239,30 +278,40 @@ becomes
{
var #next int
var #exit1 bool
f(func() {
if #exit1 { runtime.panicrangeexit() }
var #exit2 bool
g(func() {
if #exit2 { runtime.panicrangeexit() }
var #exit3 bool
h(func() {
if #exit3 { runtime.panicrangeexit() }
...
{
// break F
#next = 4
#exit1, #exit2, #exit3 = true, true, true
return false
}
...
{
// continue F
#next = 3
#exit2, #exit3 = true, true
return false
}
...
return true
})
#exit3 = true
if #next >= 2 {
#next -= 2
return false
}
return true
})
#exit2 = true
if #next >= 2 {
#next -= 2
return false
@ -274,6 +323,7 @@ becomes
...
return true
})
#exit1 = true
}
Note that the post-h checks only consider a break,
@ -299,6 +349,7 @@ For example
Top: print("start\n")
for range f {
for range g {
...
for range h {
...
goto Top
@ -312,28 +363,39 @@ becomes
Top: print("start\n")
{
var #next int
var #exit1 bool
f(func() {
if #exit1 { runtime.panicrangeexit() }
var #exit2 bool
g(func() {
if #exit2 { runtime.panicrangeexit() }
...
var #exit3 bool
h(func() {
if #exit3 { runtime.panicrangeexit() }
...
{
// goto Top
#next = -3
#exit1, #exit2, #exit3 = true, true, true
return false
}
...
return true
})
#exit3 = true
if #next < 0 {
return false
}
return true
})
#exit2 = true
if #next < 0 {
return false
}
return true
})
#exit1 = true
if #next == -3 {
#next = 0
goto Top
@ -435,6 +497,7 @@ type rewriter struct {
nextVar types2.Object
retVars []types2.Object
defers types2.Object
exitVarCount int // exitvars are referenced from their respective loops
}
// A branch is a single labeled branch.
@ -446,6 +509,8 @@ type branch struct {
// A forLoop describes a single range-over-func loop being processed.
type forLoop struct {
nfor *syntax.ForStmt // actual syntax
exitFlag *types2.Var // #exit variable for this loop
exitFlagDecl *syntax.VarDecl
checkRet bool // add check for "return" after loop
checkRetArgs bool // add check for "return args" after loop
@ -556,6 +621,7 @@ func (r *rewriter) startLoop(loop *forLoop) {
r.false = types2.Universe.Lookup("false")
r.rewritten = make(map[*syntax.ForStmt]syntax.Stmt)
}
loop.exitFlag, loop.exitFlagDecl = r.exitVar(loop.nfor.Pos())
}
// editStmt returns the replacement for the statement x,
@ -605,6 +671,19 @@ func (r *rewriter) editDefer(x *syntax.CallStmt) syntax.Stmt {
return x
}
func (r *rewriter) exitVar(pos syntax.Pos) (*types2.Var, *syntax.VarDecl) {
r.exitVarCount++
name := fmt.Sprintf("#exit%d", r.exitVarCount)
typ := r.bool.Type()
obj := types2.NewVar(pos, r.pkg, name, typ)
n := syntax.NewName(pos, name)
setValueType(n, typ)
r.info.Defs[n] = obj
return obj, &syntax.VarDecl{NameList: []*syntax.Name{n}}
}
// editReturn returns the replacement for the return statement x.
// See the "Return" section in the package doc comment above for more context.
func (r *rewriter) editReturn(x *syntax.ReturnStmt) syntax.Stmt {
@ -635,6 +714,9 @@ func (r *rewriter) editReturn(x *syntax.ReturnStmt) syntax.Stmt {
bl.List = append(bl.List, &syntax.AssignStmt{Lhs: r.useList(r.retVars), Rhs: x.Results})
}
bl.List = append(bl.List, &syntax.AssignStmt{Lhs: r.next(), Rhs: r.intConst(next)})
for i := 0; i < len(r.forStack); i++ {
bl.List = append(bl.List, r.setExitedAt(i))
}
bl.List = append(bl.List, &syntax.ReturnStmt{Results: r.useVar(r.false)})
setPos(bl, x.Pos())
return bl
@ -667,6 +749,9 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
for i >= 0 && r.forStack[i].nfor != targ {
i--
}
// exitFrom is the index of the loop interior to the target of the control flow,
// if such a loop exists (it does not if i == len(r.forStack) - 1)
exitFrom := i + 1
// Compute the value to assign to #next and the specific return to use.
var next int
@ -695,6 +780,7 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
for i >= 0 && r.forStack[i].nfor != targ {
i--
}
exitFrom = i + 1
// Mark loop we exit to get to targ to check for that branch.
// When i==-1 that's the outermost func body
@ -714,28 +800,35 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
// For continue of innermost loop, use "return true".
// Otherwise we are breaking the innermost loop, so "return false".
retVal := r.false
if depth == 0 && x.Tok == syntax.Continue {
retVal = r.true
}
ret = &syntax.ReturnStmt{Results: r.useVar(retVal)}
// If we're only operating on the innermost loop, the return is all we need.
if depth == 0 {
if depth == 0 && x.Tok == syntax.Continue {
ret = &syntax.ReturnStmt{Results: r.useVar(r.true)}
setPos(ret, x.Pos())
return ret
}
ret = &syntax.ReturnStmt{Results: r.useVar(r.false)}
// If this is a simple break, mark this loop as exited and return false.
// No adjustments to #next.
if depth == 0 {
bl := &syntax.BlockStmt{
List: []syntax.Stmt{r.setExited(), ret},
}
setPos(bl, x.Pos())
return bl
}
// The loop inside the one we are break/continue-ing
// needs to make that happen when we break out of it.
if x.Tok == syntax.Continue {
r.forStack[i+1].checkContinue = true
r.forStack[exitFrom].checkContinue = true
} else {
r.forStack[i+1].checkBreak = true
exitFrom = i
r.forStack[exitFrom].checkBreak = true
}
// The loops along the way just need to break.
for j := i + 2; j < len(r.forStack); j++ {
for j := exitFrom + 1; j < len(r.forStack); j++ {
r.forStack[j].checkBreak = true
}
@ -750,8 +843,15 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
// Assign #next = next and do the return.
as := &syntax.AssignStmt{Lhs: r.next(), Rhs: r.intConst(next)}
bl := &syntax.BlockStmt{
List: []syntax.Stmt{as, ret},
List: []syntax.Stmt{as},
}
// Set #exitK for this loop and those exited by the control flow.
for i := exitFrom; i < len(r.forStack); i++ {
bl.List = append(bl.List, r.setExitedAt(i))
}
bl.List = append(bl.List, ret)
setPos(bl, x.Pos())
return bl
}
@ -851,7 +951,14 @@ func (r *rewriter) endLoop(loop *forLoop) {
setPos(r.declStmt, start)
block.List = append(block.List, r.declStmt)
}
// declare the exitFlag here so it has proper scope and zeroing
exitFlagDecl := &syntax.DeclStmt{DeclList: []syntax.Decl{loop.exitFlagDecl}}
block.List = append(block.List, exitFlagDecl)
block.List = append(block.List, call)
block.List = append(block.List, r.setExited())
block.List = append(block.List, checks...)
if len(r.forStack) == 1 { // ending an outermost loop
@ -864,6 +971,18 @@ func (r *rewriter) endLoop(loop *forLoop) {
r.rewritten[nfor] = block
}
func (r *rewriter) setExited() *syntax.AssignStmt {
return r.setExitedAt(len(r.forStack) - 1)
}
func (r *rewriter) setExitedAt(index int) *syntax.AssignStmt {
loop := r.forStack[index]
return &syntax.AssignStmt{
Lhs: r.useVar(loop.exitFlag),
Rhs: r.useVar(r.true),
}
}
// bodyFunc converts the loop body (control flow has already been updated)
// to a func literal that can be passed to the range function.
//
@ -916,6 +1035,10 @@ func (r *rewriter) bodyFunc(body []syntax.Stmt, lhs []syntax.Expr, def bool, fty
tv.SetIsValue()
bodyFunc.SetTypeInfo(tv)
loop := r.forStack[len(r.forStack)-1]
bodyFunc.Body.List = append(bodyFunc.Body.List, r.assertNotExited(start, loop))
// Original loop body (already rewritten by editStmt during inspect).
bodyFunc.Body.List = append(bodyFunc.Body.List, body...)
@ -1010,6 +1133,36 @@ func (r *rewriter) ifNext(op syntax.Operator, c int, then syntax.Stmt) syntax.St
return nif
}
// setValueType marks x as a value with type typ.
func setValueType(x syntax.Expr, typ syntax.Type) {
tv := syntax.TypeAndValue{Type: typ}
tv.SetIsValue()
x.SetTypeInfo(tv)
}
// assertNotExited returns the statement:
//
// if #exitK { runtime.panicrangeexit() }
//
// where #exitK is the exit guard for loop.
func (r *rewriter) assertNotExited(start syntax.Pos, loop *forLoop) syntax.Stmt {
callPanicExpr := &syntax.CallExpr{
Fun: runtimeSym(r.info, "panicrangeexit"),
}
setValueType(callPanicExpr, nil) // no result type
callPanic := &syntax.ExprStmt{X: callPanicExpr}
nif := &syntax.IfStmt{
Cond: r.useVar(loop.exitFlag),
Then: &syntax.BlockStmt{
List: []syntax.Stmt{callPanic},
},
}
setPos(nif, start)
return nif
}
// next returns a reference to the #next variable.
func (r *rewriter) next() *syntax.Name {
if r.nextVar == nil {
@ -1106,7 +1259,11 @@ var runtimePkg = func() *types2.Package {
anyType := types2.Universe.Lookup("any").Type()
// func deferrangefunc() unsafe.Pointer
obj := types2.NewVar(nopos, pkg, "deferrangefunc", types2.NewSignatureType(nil, nil, nil, nil, types2.NewTuple(types2.NewParam(nopos, pkg, "extra", anyType)), false))
obj := types2.NewFunc(nopos, pkg, "deferrangefunc", types2.NewSignatureType(nil, nil, nil, nil, types2.NewTuple(types2.NewParam(nopos, pkg, "extra", anyType)), false))
pkg.Scope().Insert(obj)
// func panicrangeexit()
obj = types2.NewFunc(nopos, pkg, "panicrangeexit", types2.NewSignatureType(nil, nil, nil, nil, nil, false))
pkg.Scope().Insert(obj)
return pkg
@ -1118,6 +1275,7 @@ func runtimeSym(info *types2.Info, name string) *syntax.Name {
n := syntax.NewName(nopos, "runtime."+name)
tv := syntax.TypeAndValue{Type: obj.Type()}
tv.SetIsValue()
tv.SetIsRuntimeHelper()
n.SetTypeInfo(tv)
info.Uses[n] = obj
return n

View file

@ -39,16 +39,17 @@ type TypeAndValue struct {
exprFlags
}
type exprFlags uint8
type exprFlags uint16
func (f exprFlags) IsVoid() bool { return f&1 != 0 }
func (f exprFlags) IsType() bool { return f&2 != 0 }
func (f exprFlags) IsBuiltin() bool { return f&4 != 0 }
func (f exprFlags) IsBuiltin() bool { return f&4 != 0 } // a language builtin that resembles a function call, e.g., "make, append, new"
func (f exprFlags) IsValue() bool { return f&8 != 0 }
func (f exprFlags) IsNil() bool { return f&16 != 0 }
func (f exprFlags) Addressable() bool { return f&32 != 0 }
func (f exprFlags) Assignable() bool { return f&64 != 0 }
func (f exprFlags) HasOk() bool { return f&128 != 0 }
func (f exprFlags) IsRuntimeHelper() bool { return f&256 != 0 } // a runtime function called from transformed syntax
func (f *exprFlags) SetIsVoid() { *f |= 1 }
func (f *exprFlags) SetIsType() { *f |= 2 }
@ -58,6 +59,7 @@ func (f *exprFlags) SetIsNil() { *f |= 16 }
func (f *exprFlags) SetAddressable() { *f |= 32 }
func (f *exprFlags) SetAssignable() { *f |= 64 }
func (f *exprFlags) SetHasOk() { *f |= 128 }
func (f *exprFlags) SetIsRuntimeHelper() { *f |= 256 }
// a typeAndValue contains the results of typechecking an expression.
// It is embedded in expression nodes.

View file

@ -116,6 +116,9 @@ func interfaceSwitch(s *byte, t *byte) (int, *byte)
func ifaceeq(tab *uintptr, x, y unsafe.Pointer) (ret bool)
func efaceeq(typ *uintptr, x, y unsafe.Pointer) (ret bool)
// panic for iteration after exit in range func
func panicrangeexit()
// defer in range over func
func deferrangefunc() interface{}

View file

@ -102,6 +102,7 @@ var runtimeDecls = [...]struct {
{"interfaceSwitch", funcTag, 70},
{"ifaceeq", funcTag, 72},
{"efaceeq", funcTag, 72},
{"panicrangeexit", funcTag, 9},
{"deferrangefunc", funcTag, 73},
{"fastrand", funcTag, 74},
{"makemap64", funcTag, 76},

View file

@ -65,7 +65,6 @@ var builtins = [...]struct {
{"runtime.slicecopy", 1},
{"runtime.decoderune", 1},
{"runtime.countrunes", 1},
{"runtime.convI2I", 1},
{"runtime.convT", 1},
{"runtime.convTnoptr", 1},
{"runtime.convT16", 1},
@ -75,13 +74,15 @@ var builtins = [...]struct {
{"runtime.convTslice", 1},
{"runtime.assertE2I", 1},
{"runtime.assertE2I2", 1},
{"runtime.assertI2I", 1},
{"runtime.assertI2I2", 1},
{"runtime.panicdottypeE", 1},
{"runtime.panicdottypeI", 1},
{"runtime.panicnildottype", 1},
{"runtime.typeAssert", 1},
{"runtime.interfaceSwitch", 1},
{"runtime.ifaceeq", 1},
{"runtime.efaceeq", 1},
{"runtime.panicrangeexit", 1},
{"runtime.deferrangefunc", 1},
{"runtime.fastrand", 1},
{"runtime.makemap64", 1},
{"runtime.makemap", 1},
@ -134,7 +135,6 @@ var builtins = [...]struct {
{"runtime.unsafestringcheckptr", 1},
{"runtime.panicunsafestringlen", 1},
{"runtime.panicunsafestringnilptr", 1},
{"runtime.mulUintptr", 1},
{"runtime.memmove", 1},
{"runtime.memclrNoHeapPointers", 1},
{"runtime.memclrHasPointers", 1},
@ -210,6 +210,7 @@ var builtins = [...]struct {
{"runtime.x86HasFMA", 0},
{"runtime.armHasVFPv4", 0},
{"runtime.arm64HasATOMICS", 0},
{"runtime.asanregisterglobals", 1},
{"runtime.deferproc", 1},
{"runtime.deferprocStack", 1},
{"runtime.deferreturn", 1},

View file

@ -296,6 +296,13 @@ func deferproc(fn func()) {
// been set and must not be clobbered.
}
var rangeExitError = error(errorString("range function continued iteration after exit"))
//go:noinline
func panicrangeexit() {
panic(rangeExitError)
}
// deferrangefunc is called by functions that are about to
// execute a range-over-function loop in which the loop body
// may execute a defer statement. That defer needs to add to