go/scanner: add End method to Scanner

Fixes #74958 Change-Id: I50ef64ae9ae6a762a7aada3d29914bae6a6a6964 Reviewed-on: https://go-review.googlesource.com/c/go/+/738681 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Alan Donovan <adonovan@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
2026-02-07 02:09:55 +00:00 · 2026-01-23 09:09:49 +01:00 · 2026-01-23 09:09:49 +01:00 · 3c924059e6
commit 3c924059e6
parent 7f0f671951
4 changed files with 190 additions and 1 deletions
--- a/api/next/74958.txt
+++ b/api/next/74958.txt
@ -0,0 +1 @@
+pkg go/scanner, method (*Scanner) End() token.Pos #74958
--- a/doc/next/6-stdlib/99-minor/go/scanner/74958.md
+++ b/doc/next/6-stdlib/99-minor/go/scanner/74958.md
@ -0,0 +1 @@
+The scanner now allows retrieving the end position of a token via the new [Scanner.End] method.
--- a/src/go/scanner/scanner.go
+++ b/src/go/scanner/scanner.go
@ -44,6 +44,9 @@ type Scanner struct {
 	nlPos      token.Pos // position of newline in preceding comment
 	stringEnd  token.Pos // end position; defined only for STRING tokens

+	endPosValid bool
+	endPos      token.Pos // overrides the offset as the default end position
+
 	// public state - ok to modify
 	ErrorCount int // number of errors encountered
 }
@ -154,7 +157,9 @@ func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode
 		err:  err,
 		mode: mode,

-		ch: ' ',
+		ch:          ' ',
+		endPosValid: true,
+		endPos:      token.NoPos,
 	}

 	s.next()
@ -777,6 +782,21 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
 	return tok0
 }

+// End returns the position immediately after the last scanned token.
+// If [Scanner.Scan] has not been called yet, End returns [token.NoPos].
+func (s *Scanner) End() token.Pos {
+	// Handles special case:
+	// - Makes sure we return [token.NoPos], even when [Scanner.Init] has consumed a BOM.
+	// - When the previous token was a synthetic [token.SEMICOLON] inside a multi-line
+	//   comment, we make sure End returns its ending position (i.e. prevPos+len("\n")).
+	if s.endPosValid {
+		return s.endPos
+	}
+
+	// Normal case: s.file.Pos(s.offset) represents the end of the token
+	return s.file.Pos(s.offset)
+}
+
 // Scan scans the next token and returns the token position, the token,
 // and its literal string if applicable. The source end is indicated by
 // [token.EOF].
@ -809,10 +829,13 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
 // and thus relative to the file set.
 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
 scanAgain:
+	s.endPosValid = false
 	if s.nlPos.IsValid() {
 		// Return artificial ';' token after /*...*/ comment
 		// containing newline, at position of first newline.
 		pos, tok, lit = s.nlPos, token.SEMICOLON, "\n"
+		s.endPos = pos + 1
+		s.endPosValid = true
 		s.nlPos = token.NoPos
 		return
 	}
--- a/src/go/scanner/scanner_test.go
+++ b/src/go/scanner/scanner_test.go
@ -1179,3 +1179,167 @@ func TestScanReuseSemiInNewlineComment(t *testing.T) {
 		t.Fatalf("tok = %v; want = token.IDENT", tok)
 	}
 }
+
+func TestScannerEnd(t *testing.T) {
+	type tok struct {
+		tok   token.Token
+		start token.Pos
+		end   token.Pos
+	}
+
+	cases := []struct {
+		name string
+		src  string
+		end  []tok
+	}{
+		{
+			name: "operators",
+			src:  "+ - / >> == =",
+			end: []tok{
+				{token.ADD, 1, 2},
+				{token.SUB, 3, 4},
+				{token.QUO, 5, 6},
+				{token.SHR, 7, 9},
+				{token.EQL, 10, 12},
+				{token.ASSIGN, 13, 14},
+				{token.EOF, 14, 14},
+			},
+		},
+		{
+			name: "braces",
+			src:  "{([])}",
+			end: []tok{
+				{token.LBRACE, 1, 2},
+				{token.LPAREN, 2, 3},
+				{token.LBRACK, 3, 4},
+				{token.RBRACK, 4, 5},
+				{token.RPAREN, 5, 6},
+				{token.RBRACE, 6, 7},
+				{token.SEMICOLON, 7, 7},
+				{token.EOF, 7, 7},
+			},
+		},
+		{
+			name: "literals",
+			src:  `"foo" 123 1.23 0b11`,
+			end: []tok{
+				{token.STRING, 1, 6},
+				{token.INT, 7, 10},
+				{token.FLOAT, 11, 15},
+				{token.INT, 16, 20},
+				{token.SEMICOLON, 20, 20},
+				{token.EOF, 20, 20},
+			},
+		},
+		{
+			name: "missing newline at the end of file",
+			src:  "foo",
+			end: []tok{
+				{token.IDENT, 1, 4},
+				{token.SEMICOLON, 4, 4},
+				{token.EOF, 4, 4},
+			},
+		},
+		{
+			name: "newline at the end of file",
+			src:  "foo\n",
+			end: []tok{
+				{token.IDENT, 1, 4},
+				{token.SEMICOLON, 4, 5},
+				{token.EOF, 5, 5},
+			},
+		},
+		{
+			name: "semicolon at the end of file",
+			src:  "foo;",
+			end: []tok{
+				{token.IDENT, 1, 4},
+				{token.SEMICOLON, 4, 5},
+				{token.EOF, 5, 5},
+			},
+		},
+		{
+			name: "semicolon and newline at the end of file",
+			src:  "foo;\n",
+			end: []tok{
+				{token.IDENT, 1, 4},
+				{token.SEMICOLON, 4, 5},
+				{token.EOF, 6, 6},
+			},
+		},
+		{
+			name: "newline in comment acting as semicolon",
+			src:  "foo /*\n*/ bar",
+			end: []tok{
+				{token.IDENT, 1, 4},
+				{token.COMMENT, 5, 10},
+				{token.SEMICOLON, 7, 8},
+				{token.IDENT, 11, 14},
+				{token.SEMICOLON, 14, 14},
+				{token.EOF, 14, 14},
+			},
+		},
+		{
+			name: "BOM",
+			src:  "\uFEFFfoo",
+			end: []tok{
+				{token.IDENT, 4, 7},
+				{token.SEMICOLON, 7, 7},
+				{token.EOF, 7, 7},
+			},
+		},
+	}
+
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			fset := token.NewFileSet()
+
+			var s Scanner
+			errorHandler := func(_ token.Position, msg string) { t.Fatal(msg) }
+			s.Init(fset.AddFile("test.go", -1, len(tt.src)), []byte(tt.src), errorHandler, ScanComments)
+
+			if end := s.End(); end != token.NoPos {
+				t.Errorf("after init: s.End() = %v; want token.NoPos", end)
+			}
+
+			var got []tok
+			for {
+				pos, tokTyp, _ := s.Scan()
+				got = append(got, tok{tokTyp, pos, s.End()})
+				if tokTyp == token.EOF {
+					break
+				}
+			}
+
+			if !slices.Equal(got, tt.end) {
+				t.Fatalf("input %q: got = %v; want = %v", tt.src, got, tt.end)
+			}
+		})
+	}
+}
+
+func TestScannerEndReuse(t *testing.T) {
+	fset := token.NewFileSet()
+
+	const src = "identifier /*a\nb*/ + other"
+	var s Scanner
+	s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) {
+		t.Fatal(msg)
+	}, ScanComments)
+
+	s.Scan() // IDENT(identifier)
+	s.Scan() // COMMENT(/*a\n*b/)
+
+	_, tok, _ := s.Scan() // SEMICOLON
+	if tok != token.SEMICOLON {
+		t.Fatalf("tok = %v; want = token.SEMICOLON", tok)
+	}
+
+	s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) {
+		t.Fatal(msg)
+	}, ScanComments)
+
+	if end := s.End(); end != token.NoPos {
+		t.Errorf("s.End() = %v; want token.NoPos", end)
+	}
+}
				`@ -0,0 +1 @@`
				`pkg go/scanner, method (*Scanner) End() token.Pos #74958`
				`@ -0,0 +1 @@`
				`The scanner now allows retrieving the end position of a token via the new [Scanner.End] method.`