cmd/compile: check for iteration after range func loop exit

When this happens, panic. This is a revised version of a check that used #next, where this one instead uses a per-loop #exit flag, and catches more problematic iterators. Updates #56413. Updates #61405. Change-Id: I6574f754e475bb67b9236b4f6c25979089f9b629 Reviewed-on: https://go-review.googlesource.com/c/go/+/540263 Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2025-12-08 06:10:04 +00:00 · 2023-11-07 17:20:35 -05:00 · 2023-11-07 17:20:35 -05:00 · b5e31780b6
commit b5e31780b6
parent 6f5aba995f
10 changed files with 1522 additions and 39 deletions
--- a/src/cmd/compile/internal/noder/codes.go
+++ b/src/cmd/compile/internal/noder/codes.go
@ -62,6 +62,7 @@ const (
 	exprFuncInst
 	exprRecv
 	exprReshape
+	exprRuntimeBuiltin // a reference to a runtime function from transformed syntax. Followed by string name, e.g., "panicrangeexit"
 )

 type codeAssign int
--- a/src/cmd/compile/internal/noder/reader.go
+++ b/src/cmd/compile/internal/noder/reader.go
@ -2442,6 +2442,10 @@ func (r *reader) expr() (res ir.Node) {
 			n.SetTypecheck(1)
 		}
 		return n
+
+	case exprRuntimeBuiltin:
+		builtin := typecheck.LookupRuntime(r.String())
+		return builtin
 	}
 }

--- a/src/cmd/compile/internal/noder/writer.go
+++ b/src/cmd/compile/internal/noder/writer.go
@ -1715,6 +1715,15 @@ func (w *writer) expr(expr syntax.Expr) {
 	targs := inst.TypeArgs

 	if tv, ok := w.p.maybeTypeAndValue(expr); ok {
+		if tv.IsRuntimeHelper() {
+			if pkg := obj.Pkg(); pkg != nil && pkg.Name() == "runtime" {
+				objName := obj.Name()
+				w.Code(exprRuntimeBuiltin)
+				w.String(objName)
+				return
+			}
+		}
+
 		if tv.IsType() {
 			w.p.fatalf(expr, "unexpected type expression %v", syntax.String(expr))
 		}
--- a/src/cmd/compile/internal/rangefunc/rangefunc_test.go
+++ b/src/cmd/compile/internal/rangefunc/rangefunc_test.go
--- a/src/cmd/compile/internal/rangefunc/rewrite.go
+++ b/src/cmd/compile/internal/rangefunc/rewrite.go
@ -141,6 +141,37 @@ TODO: What about:
 With this rewrite the "return true" is not visible after yield returns,
 but maybe it should be?

+# Checking
+
+To permit checking that an iterator is well-behaved -- that is, that
+it does not call the loop body again after it has returned false or
+after the entire loop has exited (it might retain a copy of the body
+function, or pass it to another goroutine) -- each generated loop has
+its own #exitK flag that is checked before each iteration, and set both
+at any early exit and after the iteration completes.
+
+For example:
+
+	for x := range f {
+		...
+		if ... { break }
+		...
+	}
+
+becomes
+
+	{
+		var #exit1 bool
+		f(func(x T1) bool {
+			if #exit1 { runtime.panicrangeexit() }
+			...
+			if ... { #exit1 = true ; return false }
+			...
+			return true
+		})
+		#exit1 = true
+	}
+
 # Nested Loops

 So far we've only considered a single loop. If a function contains a
@ -175,23 +206,30 @@ becomes
 			#r1 type1
 			#r2 type2
 		)
+		var #exit1 bool
 		f(func() {
+			if #exit1 { runtime.panicrangeexit() }
+			var #exit2 bool
 			g(func() {
+				if #exit2 { runtime.panicrangeexit() }
 				...
 				{
 					// return a, b
 					#r1, #r2 = a, b
 					#next = -2
+					#exit1, #exit2 = true, true
 					return false
 				}
 				...
 				return true
 			})
+			#exit2 = true
 			if #next < 0 {
 				return false
 			}
 			return true
 		})
+		#exit1 = true
 		if #next == -2 {
 			return #r1, #r2
 		}
@ -205,7 +243,8 @@ return with a single check.
 For a labeled break or continue of an outer range-over-func, we
 use positive #next values. Any such labeled break or continue
 really means "do N breaks" or "do N breaks and 1 continue".
-We encode that as 2*N or 2*N+1 respectively.
+We encode that as perLoopStep*N or perLoopStep*N+1 respectively.
+
 Loops that might need to propagate a labeled break or continue
 add one or both of these to the #next checks:

@ -239,30 +278,40 @@ becomes

 	{
 		var #next int
+		var #exit1 bool
 		f(func() {
+			if #exit1 { runtime.panicrangeexit() }
+			var #exit2 bool
 			g(func() {
+				if #exit2 { runtime.panicrangeexit() }
+				var #exit3 bool
 				h(func() {
+					if #exit3 { runtime.panicrangeexit() }
 					...
 					{
 						// break F
 						#next = 4
+						#exit1, #exit2, #exit3 = true, true, true
 						return false
 					}
 					...
 					{
 						// continue F
 						#next = 3
+						#exit2, #exit3 = true, true
 						return false
 					}
 					...
 					return true
 				})
+				#exit3 = true
 				if #next >= 2 {
 					#next -= 2
 					return false
 				}
 				return true
 			})
+			#exit2 = true
 			if #next >= 2 {
 				#next -= 2
 				return false
@ -274,6 +323,7 @@ becomes
 			...
 			return true
 		})
+		#exit1 = true
 	}

 Note that the post-h checks only consider a break,
@ -299,6 +349,7 @@ For example
 	Top: print("start\n")
 	for range f {
 		for range g {
+			...
 			for range h {
 				...
 				goto Top
@ -312,28 +363,39 @@ becomes
 	Top: print("start\n")
 	{
 		var #next int
+		var #exit1 bool
 		f(func() {
+			if #exit1 { runtime.panicrangeexit() }
+			var #exit2 bool
 			g(func() {
+				if #exit2 { runtime.panicrangeexit() }
+				...
+				var #exit3 bool
 				h(func() {
+				if #exit3 { runtime.panicrangeexit() }
 					...
 					{
 						// goto Top
 						#next = -3
+						#exit1, #exit2, #exit3 = true, true, true
 						return false
 					}
 					...
 					return true
 				})
+				#exit3 = true
 				if #next < 0 {
 					return false
 				}
 				return true
 			})
+			#exit2 = true
 			if #next < 0 {
 				return false
 			}
 			return true
 		})
+		#exit1 = true
 		if #next == -3 {
 			#next = 0
 			goto Top
@ -435,6 +497,7 @@ type rewriter struct {
 	nextVar      types2.Object
 	retVars      []types2.Object
 	defers       types2.Object
+	exitVarCount int // exitvars are referenced from their respective loops
 }

 // A branch is a single labeled branch.
@ -446,6 +509,8 @@ type branch struct {
 // A forLoop describes a single range-over-func loop being processed.
 type forLoop struct {
 	nfor         *syntax.ForStmt // actual syntax
+	exitFlag     *types2.Var     // #exit variable for this loop
+	exitFlagDecl *syntax.VarDecl

 	checkRet      bool     // add check for "return" after loop
 	checkRetArgs  bool     // add check for "return args" after loop
@ -556,6 +621,7 @@ func (r *rewriter) startLoop(loop *forLoop) {
 		r.false = types2.Universe.Lookup("false")
 		r.rewritten = make(map[*syntax.ForStmt]syntax.Stmt)
 	}
+	loop.exitFlag, loop.exitFlagDecl = r.exitVar(loop.nfor.Pos())
 }

 // editStmt returns the replacement for the statement x,
@ -605,6 +671,19 @@ func (r *rewriter) editDefer(x *syntax.CallStmt) syntax.Stmt {
 	return x
 }

+func (r *rewriter) exitVar(pos syntax.Pos) (*types2.Var, *syntax.VarDecl) {
+	r.exitVarCount++
+
+	name := fmt.Sprintf("#exit%d", r.exitVarCount)
+	typ := r.bool.Type()
+	obj := types2.NewVar(pos, r.pkg, name, typ)
+	n := syntax.NewName(pos, name)
+	setValueType(n, typ)
+	r.info.Defs[n] = obj
+
+	return obj, &syntax.VarDecl{NameList: []*syntax.Name{n}}
+}
+
 // editReturn returns the replacement for the return statement x.
 // See the "Return" section in the package doc comment above for more context.
 func (r *rewriter) editReturn(x *syntax.ReturnStmt) syntax.Stmt {
@ -635,6 +714,9 @@ func (r *rewriter) editReturn(x *syntax.ReturnStmt) syntax.Stmt {
 		bl.List = append(bl.List, &syntax.AssignStmt{Lhs: r.useList(r.retVars), Rhs: x.Results})
 	}
 	bl.List = append(bl.List, &syntax.AssignStmt{Lhs: r.next(), Rhs: r.intConst(next)})
+	for i := 0; i < len(r.forStack); i++ {
+		bl.List = append(bl.List, r.setExitedAt(i))
+	}
 	bl.List = append(bl.List, &syntax.ReturnStmt{Results: r.useVar(r.false)})
 	setPos(bl, x.Pos())
 	return bl
@ -667,6 +749,9 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
 	for i >= 0 && r.forStack[i].nfor != targ {
 		i--
 	}
+	// exitFrom is the index of the loop interior to the target of the control flow,
+	// if such a loop exists (it does not if i == len(r.forStack) - 1)
+	exitFrom := i + 1

 	// Compute the value to assign to #next and the specific return to use.
 	var next int
@ -695,6 +780,7 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
 		for i >= 0 && r.forStack[i].nfor != targ {
 			i--
 		}
+		exitFrom = i + 1

 		// Mark loop we exit to get to targ to check for that branch.
 		// When i==-1 that's the outermost func body
@ -714,28 +800,35 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {

 		// For continue of innermost loop, use "return true".
 		// Otherwise we are breaking the innermost loop, so "return false".
-		retVal := r.false
-		if depth == 0 && x.Tok == syntax.Continue {
-			retVal = r.true
-		}
-		ret = &syntax.ReturnStmt{Results: r.useVar(retVal)}

-		// If we're only operating on the innermost loop, the return is all we need.
-		if depth == 0 {
+		if depth == 0 && x.Tok == syntax.Continue {
+			ret = &syntax.ReturnStmt{Results: r.useVar(r.true)}
 			setPos(ret, x.Pos())
 			return ret
 		}
+		ret = &syntax.ReturnStmt{Results: r.useVar(r.false)}
+
+		// If this is a simple break, mark this loop as exited and return false.
+		// No adjustments to #next.
+		if depth == 0 {
+			bl := &syntax.BlockStmt{
+				List: []syntax.Stmt{r.setExited(), ret},
+			}
+			setPos(bl, x.Pos())
+			return bl
+		}

 		// The loop inside the one we are break/continue-ing
 		// needs to make that happen when we break out of it.
 		if x.Tok == syntax.Continue {
-			r.forStack[i+1].checkContinue = true
+			r.forStack[exitFrom].checkContinue = true
 		} else {
-			r.forStack[i+1].checkBreak = true
+			exitFrom = i
+			r.forStack[exitFrom].checkBreak = true
 		}

 		// The loops along the way just need to break.
-		for j := i + 2; j < len(r.forStack); j++ {
+		for j := exitFrom + 1; j < len(r.forStack); j++ {
 			r.forStack[j].checkBreak = true
 		}

@ -750,8 +843,15 @@ func (r *rewriter) editBranch(x *syntax.BranchStmt) syntax.Stmt {
 	// Assign #next = next and do the return.
 	as := &syntax.AssignStmt{Lhs: r.next(), Rhs: r.intConst(next)}
 	bl := &syntax.BlockStmt{
-		List: []syntax.Stmt{as, ret},
+		List: []syntax.Stmt{as},
 	}
+
+	// Set #exitK for this loop and those exited by the control flow.
+	for i := exitFrom; i < len(r.forStack); i++ {
+		bl.List = append(bl.List, r.setExitedAt(i))
+	}
+
+	bl.List = append(bl.List, ret)
 	setPos(bl, x.Pos())
 	return bl
 }
@ -851,7 +951,14 @@ func (r *rewriter) endLoop(loop *forLoop) {
 		setPos(r.declStmt, start)
 		block.List = append(block.List, r.declStmt)
 	}
+
+	// declare the exitFlag here so it has proper scope and zeroing
+	exitFlagDecl := &syntax.DeclStmt{DeclList: []syntax.Decl{loop.exitFlagDecl}}
+	block.List = append(block.List, exitFlagDecl)
+
 	block.List = append(block.List, call)
+
+	block.List = append(block.List, r.setExited())
 	block.List = append(block.List, checks...)

 	if len(r.forStack) == 1 { // ending an outermost loop
@ -864,6 +971,18 @@ func (r *rewriter) endLoop(loop *forLoop) {
 	r.rewritten[nfor] = block
 }

+func (r *rewriter) setExited() *syntax.AssignStmt {
+	return r.setExitedAt(len(r.forStack) - 1)
+}
+
+func (r *rewriter) setExitedAt(index int) *syntax.AssignStmt {
+	loop := r.forStack[index]
+	return &syntax.AssignStmt{
+		Lhs: r.useVar(loop.exitFlag),
+		Rhs: r.useVar(r.true),
+	}
+}
+
 // bodyFunc converts the loop body (control flow has already been updated)
 // to a func literal that can be passed to the range function.
 //
@ -916,6 +1035,10 @@ func (r *rewriter) bodyFunc(body []syntax.Stmt, lhs []syntax.Expr, def bool, fty
 	tv.SetIsValue()
 	bodyFunc.SetTypeInfo(tv)

+	loop := r.forStack[len(r.forStack)-1]
+
+	bodyFunc.Body.List = append(bodyFunc.Body.List, r.assertNotExited(start, loop))
+
 	// Original loop body (already rewritten by editStmt during inspect).
 	bodyFunc.Body.List = append(bodyFunc.Body.List, body...)

@ -1010,6 +1133,36 @@ func (r *rewriter) ifNext(op syntax.Operator, c int, then syntax.Stmt) syntax.St
 	return nif
 }

+// setValueType marks x as a value with type typ.
+func setValueType(x syntax.Expr, typ syntax.Type) {
+	tv := syntax.TypeAndValue{Type: typ}
+	tv.SetIsValue()
+	x.SetTypeInfo(tv)
+}
+
+// assertNotExited returns the statement:
+//
+//	if #exitK { runtime.panicrangeexit() }
+//
+// where #exitK is the exit guard for loop.
+func (r *rewriter) assertNotExited(start syntax.Pos, loop *forLoop) syntax.Stmt {
+	callPanicExpr := &syntax.CallExpr{
+		Fun: runtimeSym(r.info, "panicrangeexit"),
+	}
+	setValueType(callPanicExpr, nil) // no result type
+
+	callPanic := &syntax.ExprStmt{X: callPanicExpr}
+
+	nif := &syntax.IfStmt{
+		Cond: r.useVar(loop.exitFlag),
+		Then: &syntax.BlockStmt{
+			List: []syntax.Stmt{callPanic},
+		},
+	}
+	setPos(nif, start)
+	return nif
+}
+
 // next returns a reference to the #next variable.
 func (r *rewriter) next() *syntax.Name {
 	if r.nextVar == nil {
@ -1106,7 +1259,11 @@ var runtimePkg = func() *types2.Package {
 	anyType := types2.Universe.Lookup("any").Type()

 	// func deferrangefunc() unsafe.Pointer
-	obj := types2.NewVar(nopos, pkg, "deferrangefunc", types2.NewSignatureType(nil, nil, nil, nil, types2.NewTuple(types2.NewParam(nopos, pkg, "extra", anyType)), false))
+	obj := types2.NewFunc(nopos, pkg, "deferrangefunc", types2.NewSignatureType(nil, nil, nil, nil, types2.NewTuple(types2.NewParam(nopos, pkg, "extra", anyType)), false))
+	pkg.Scope().Insert(obj)
+
+	// func panicrangeexit()
+	obj = types2.NewFunc(nopos, pkg, "panicrangeexit", types2.NewSignatureType(nil, nil, nil, nil, nil, false))
 	pkg.Scope().Insert(obj)

 	return pkg
@ -1118,6 +1275,7 @@ func runtimeSym(info *types2.Info, name string) *syntax.Name {
 	n := syntax.NewName(nopos, "runtime."+name)
 	tv := syntax.TypeAndValue{Type: obj.Type()}
 	tv.SetIsValue()
+	tv.SetIsRuntimeHelper()
 	n.SetTypeInfo(tv)
 	info.Uses[n] = obj
 	return n
--- a/src/cmd/compile/internal/syntax/type.go
+++ b/src/cmd/compile/internal/syntax/type.go
@ -39,16 +39,17 @@ type TypeAndValue struct {
 	exprFlags
 }

-type exprFlags uint8
+type exprFlags uint16

 func (f exprFlags) IsVoid() bool          { return f&1 != 0 }
 func (f exprFlags) IsType() bool          { return f&2 != 0 }
-func (f exprFlags) IsBuiltin() bool   { return f&4 != 0 }
+func (f exprFlags) IsBuiltin() bool       { return f&4 != 0 } // a language builtin that resembles a function call, e.g., "make, append, new"
 func (f exprFlags) IsValue() bool         { return f&8 != 0 }
 func (f exprFlags) IsNil() bool           { return f&16 != 0 }
 func (f exprFlags) Addressable() bool     { return f&32 != 0 }
 func (f exprFlags) Assignable() bool      { return f&64 != 0 }
 func (f exprFlags) HasOk() bool           { return f&128 != 0 }
+func (f exprFlags) IsRuntimeHelper() bool { return f&256 != 0 } // a runtime function called from transformed syntax

 func (f *exprFlags) SetIsVoid()          { *f |= 1 }
 func (f *exprFlags) SetIsType()          { *f |= 2 }
@ -58,6 +59,7 @@ func (f *exprFlags) SetIsNil()       { *f |= 16 }
 func (f *exprFlags) SetAddressable()     { *f |= 32 }
 func (f *exprFlags) SetAssignable()      { *f |= 64 }
 func (f *exprFlags) SetHasOk()           { *f |= 128 }
+func (f *exprFlags) SetIsRuntimeHelper() { *f |= 256 }

 // a typeAndValue contains the results of typechecking an expression.
 // It is embedded in expression nodes.
--- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go
+++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
@ -116,6 +116,9 @@ func interfaceSwitch(s *byte, t *byte) (int, *byte)
 func ifaceeq(tab *uintptr, x, y unsafe.Pointer) (ret bool)
 func efaceeq(typ *uintptr, x, y unsafe.Pointer) (ret bool)

+// panic for iteration after exit in range func
+func panicrangeexit()
+
 // defer in range over func
 func deferrangefunc() interface{}

--- a/src/cmd/compile/internal/typecheck/builtin.go
+++ b/src/cmd/compile/internal/typecheck/builtin.go
@ -102,6 +102,7 @@ var runtimeDecls = [...]struct {
 	{"interfaceSwitch", funcTag, 70},
 	{"ifaceeq", funcTag, 72},
 	{"efaceeq", funcTag, 72},
+	{"panicrangeexit", funcTag, 9},
 	{"deferrangefunc", funcTag, 73},
 	{"fastrand", funcTag, 74},
 	{"makemap64", funcTag, 76},
--- a/src/cmd/internal/goobj/builtinlist.go
+++ b/src/cmd/internal/goobj/builtinlist.go
@ -65,7 +65,6 @@ var builtins = [...]struct {
 	{"runtime.slicecopy", 1},
 	{"runtime.decoderune", 1},
 	{"runtime.countrunes", 1},
-	{"runtime.convI2I", 1},
 	{"runtime.convT", 1},
 	{"runtime.convTnoptr", 1},
 	{"runtime.convT16", 1},
@ -75,13 +74,15 @@ var builtins = [...]struct {
 	{"runtime.convTslice", 1},
 	{"runtime.assertE2I", 1},
 	{"runtime.assertE2I2", 1},
-	{"runtime.assertI2I", 1},
-	{"runtime.assertI2I2", 1},
 	{"runtime.panicdottypeE", 1},
 	{"runtime.panicdottypeI", 1},
 	{"runtime.panicnildottype", 1},
+	{"runtime.typeAssert", 1},
+	{"runtime.interfaceSwitch", 1},
 	{"runtime.ifaceeq", 1},
 	{"runtime.efaceeq", 1},
+	{"runtime.panicrangeexit", 1},
+	{"runtime.deferrangefunc", 1},
 	{"runtime.fastrand", 1},
 	{"runtime.makemap64", 1},
 	{"runtime.makemap", 1},
@ -134,7 +135,6 @@ var builtins = [...]struct {
 	{"runtime.unsafestringcheckptr", 1},
 	{"runtime.panicunsafestringlen", 1},
 	{"runtime.panicunsafestringnilptr", 1},
-	{"runtime.mulUintptr", 1},
 	{"runtime.memmove", 1},
 	{"runtime.memclrNoHeapPointers", 1},
 	{"runtime.memclrHasPointers", 1},
@ -210,6 +210,7 @@ var builtins = [...]struct {
 	{"runtime.x86HasFMA", 0},
 	{"runtime.armHasVFPv4", 0},
 	{"runtime.arm64HasATOMICS", 0},
+	{"runtime.asanregisterglobals", 1},
 	{"runtime.deferproc", 1},
 	{"runtime.deferprocStack", 1},
 	{"runtime.deferreturn", 1},
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@ -296,6 +296,13 @@ func deferproc(fn func()) {
 	// been set and must not be clobbered.
 }

+var rangeExitError = error(errorString("range function continued iteration after exit"))
+
+//go:noinline
+func panicrangeexit() {
+	panic(rangeExitError)
+}
+
 // deferrangefunc is called by functions that are about to
 // execute a range-over-function loop in which the loop body
 // may execute a defer statement. That defer needs to add to