go/scanner: add End method to Scanner

Fixes #74958

Change-Id: I50ef64ae9ae6a762a7aada3d29914bae6a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/738681
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Mateusz Poliwczak 2026-01-23 09:09:49 +01:00
parent 7f0f671951
commit 3c924059e6
4 changed files with 190 additions and 1 deletions

1
api/next/74958.txt Normal file
View file

@ -0,0 +1 @@
pkg go/scanner, method (*Scanner) End() token.Pos #74958

View file

@ -0,0 +1 @@
The scanner now allows retrieving the end position of a token via the new [Scanner.End] method.

View file

@ -44,6 +44,9 @@ type Scanner struct {
nlPos token.Pos // position of newline in preceding comment
stringEnd token.Pos // end position; defined only for STRING tokens
endPosValid bool
endPos token.Pos // overrides the offset as the default end position
// public state - ok to modify
ErrorCount int // number of errors encountered
}
@ -154,7 +157,9 @@ func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode
err: err,
mode: mode,
ch: ' ',
ch: ' ',
endPosValid: true,
endPos: token.NoPos,
}
s.next()
@ -777,6 +782,21 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
return tok0
}
// End returns the position immediately after the last scanned token.
// If [Scanner.Scan] has not been called yet, End returns [token.NoPos].
func (s *Scanner) End() token.Pos {
// Handles special case:
// - Makes sure we return [token.NoPos], even when [Scanner.Init] has consumed a BOM.
// - When the previous token was a synthetic [token.SEMICOLON] inside a multi-line
// comment, we make sure End returns its ending position (i.e. prevPos+len("\n")).
if s.endPosValid {
return s.endPos
}
// Normal case: s.file.Pos(s.offset) represents the end of the token
return s.file.Pos(s.offset)
}
// Scan scans the next token and returns the token position, the token,
// and its literal string if applicable. The source end is indicated by
// [token.EOF].
@ -809,10 +829,13 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
// and thus relative to the file set.
func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
scanAgain:
s.endPosValid = false
if s.nlPos.IsValid() {
// Return artificial ';' token after /*...*/ comment
// containing newline, at position of first newline.
pos, tok, lit = s.nlPos, token.SEMICOLON, "\n"
s.endPos = pos + 1
s.endPosValid = true
s.nlPos = token.NoPos
return
}

View file

@ -1179,3 +1179,167 @@ func TestScanReuseSemiInNewlineComment(t *testing.T) {
t.Fatalf("tok = %v; want = token.IDENT", tok)
}
}
func TestScannerEnd(t *testing.T) {
type tok struct {
tok token.Token
start token.Pos
end token.Pos
}
cases := []struct {
name string
src string
end []tok
}{
{
name: "operators",
src: "+ - / >> == =",
end: []tok{
{token.ADD, 1, 2},
{token.SUB, 3, 4},
{token.QUO, 5, 6},
{token.SHR, 7, 9},
{token.EQL, 10, 12},
{token.ASSIGN, 13, 14},
{token.EOF, 14, 14},
},
},
{
name: "braces",
src: "{([])}",
end: []tok{
{token.LBRACE, 1, 2},
{token.LPAREN, 2, 3},
{token.LBRACK, 3, 4},
{token.RBRACK, 4, 5},
{token.RPAREN, 5, 6},
{token.RBRACE, 6, 7},
{token.SEMICOLON, 7, 7},
{token.EOF, 7, 7},
},
},
{
name: "literals",
src: `"foo" 123 1.23 0b11`,
end: []tok{
{token.STRING, 1, 6},
{token.INT, 7, 10},
{token.FLOAT, 11, 15},
{token.INT, 16, 20},
{token.SEMICOLON, 20, 20},
{token.EOF, 20, 20},
},
},
{
name: "missing newline at the end of file",
src: "foo",
end: []tok{
{token.IDENT, 1, 4},
{token.SEMICOLON, 4, 4},
{token.EOF, 4, 4},
},
},
{
name: "newline at the end of file",
src: "foo\n",
end: []tok{
{token.IDENT, 1, 4},
{token.SEMICOLON, 4, 5},
{token.EOF, 5, 5},
},
},
{
name: "semicolon at the end of file",
src: "foo;",
end: []tok{
{token.IDENT, 1, 4},
{token.SEMICOLON, 4, 5},
{token.EOF, 5, 5},
},
},
{
name: "semicolon and newline at the end of file",
src: "foo;\n",
end: []tok{
{token.IDENT, 1, 4},
{token.SEMICOLON, 4, 5},
{token.EOF, 6, 6},
},
},
{
name: "newline in comment acting as semicolon",
src: "foo /*\n*/ bar",
end: []tok{
{token.IDENT, 1, 4},
{token.COMMENT, 5, 10},
{token.SEMICOLON, 7, 8},
{token.IDENT, 11, 14},
{token.SEMICOLON, 14, 14},
{token.EOF, 14, 14},
},
},
{
name: "BOM",
src: "\uFEFFfoo",
end: []tok{
{token.IDENT, 4, 7},
{token.SEMICOLON, 7, 7},
{token.EOF, 7, 7},
},
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
fset := token.NewFileSet()
var s Scanner
errorHandler := func(_ token.Position, msg string) { t.Fatal(msg) }
s.Init(fset.AddFile("test.go", -1, len(tt.src)), []byte(tt.src), errorHandler, ScanComments)
if end := s.End(); end != token.NoPos {
t.Errorf("after init: s.End() = %v; want token.NoPos", end)
}
var got []tok
for {
pos, tokTyp, _ := s.Scan()
got = append(got, tok{tokTyp, pos, s.End()})
if tokTyp == token.EOF {
break
}
}
if !slices.Equal(got, tt.end) {
t.Fatalf("input %q: got = %v; want = %v", tt.src, got, tt.end)
}
})
}
}
func TestScannerEndReuse(t *testing.T) {
fset := token.NewFileSet()
const src = "identifier /*a\nb*/ + other"
var s Scanner
s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) {
t.Fatal(msg)
}, ScanComments)
s.Scan() // IDENT(identifier)
s.Scan() // COMMENT(/*a\n*b/)
_, tok, _ := s.Scan() // SEMICOLON
if tok != token.SEMICOLON {
t.Fatalf("tok = %v; want = token.SEMICOLON", tok)
}
s.Init(fset.AddFile("test.go", -1, len(src)), []byte(src), func(pos token.Position, msg string) {
t.Fatal(msg)
}, ScanComments)
if end := s.End(); end != token.NoPos {
t.Errorf("s.End() = %v; want token.NoPos", end)
}
}