mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
go/ast: fix BasicLit.End position for raw strings containing \r
This CL causes the parser to record in a new field, BasicLit.EndPos, the actual end position of each literal token, and to use it in BasicLit.End. Previously, the End was computed heuristically as Pos + len(Value). This heuristic is incorrect for a multiline raw string literal on Windows, since the scanner normalizes \r\n to \n. Unfortunately the actual end position is not returned by the Scanner.Scan method, so the scanner and parser conspire using a global variable in the go/internal/scannerhook package to communicate. + test, api change, relnote Fixes #76031 Change-Id: I57c18a44e85f7403d470ba23d41dcdcc5a9432c2 Reviewed-on: https://go-review.googlesource.com/c/go/+/720060 Reviewed-by: Robert Griesemer <gri@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
592775ec7d
commit
8e734ec954
10 changed files with 140 additions and 39 deletions
1
api/next/76031.txt
Normal file
1
api/next/76031.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
pkg go/ast, type BasicLit struct, ValueEnd token.Pos #76031
|
||||||
5
doc/next/6-stdlib/99-minor/go/ast/76031.md
Normal file
5
doc/next/6-stdlib/99-minor/go/ast/76031.md
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
The new [BasicLit.ValueEnd] field records the precise end position of
|
||||||
|
a literal so that the [BasicLit.End] method can now always return the
|
||||||
|
correct answer. (Previously it was computed using a heuristic that was
|
||||||
|
incorrect for multi-line raw string literals in Windows source files,
|
||||||
|
due to removal of carriage returns.)
|
||||||
|
|
@ -312,11 +312,10 @@ type (
|
||||||
//
|
//
|
||||||
// For raw string literals (Kind == token.STRING && Value[0] == '`'),
|
// For raw string literals (Kind == token.STRING && Value[0] == '`'),
|
||||||
// the Value field contains the string text without carriage returns (\r) that
|
// the Value field contains the string text without carriage returns (\r) that
|
||||||
// may have been present in the source. Because the end position is
|
// may have been present in the source.
|
||||||
// computed using len(Value), the position reported by [BasicLit.End] does not match the
|
|
||||||
// true source end position for raw string literals containing carriage returns.
|
|
||||||
BasicLit struct {
|
BasicLit struct {
|
||||||
ValuePos token.Pos // literal position
|
ValuePos token.Pos // literal position
|
||||||
|
ValueEnd token.Pos // position immediately after the literal
|
||||||
Kind token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING
|
Kind token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING
|
||||||
Value string // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o`
|
Value string // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o`
|
||||||
}
|
}
|
||||||
|
|
@ -535,7 +534,15 @@ func (x *Ellipsis) End() token.Pos {
|
||||||
}
|
}
|
||||||
return x.Ellipsis + 3 // len("...")
|
return x.Ellipsis + 3 // len("...")
|
||||||
}
|
}
|
||||||
func (x *BasicLit) End() token.Pos { return token.Pos(int(x.ValuePos) + len(x.Value)) }
|
func (x *BasicLit) End() token.Pos {
|
||||||
|
if !x.ValueEnd.IsValid() {
|
||||||
|
// Not from parser; use a heuristic.
|
||||||
|
// (Incorrect for `...` containing \r\n;
|
||||||
|
// see https://go.dev/issue/76031.)
|
||||||
|
return token.Pos(int(x.ValuePos) + len(x.Value))
|
||||||
|
}
|
||||||
|
return x.ValueEnd
|
||||||
|
}
|
||||||
func (x *FuncLit) End() token.Pos { return x.Body.End() }
|
func (x *FuncLit) End() token.Pos { return x.Body.End() }
|
||||||
func (x *CompositeLit) End() token.Pos { return x.Rbrace + 1 }
|
func (x *CompositeLit) End() token.Pos { return x.Rbrace + 1 }
|
||||||
func (x *ParenExpr) End() token.Pos { return x.Rparen + 1 }
|
func (x *ParenExpr) End() token.Pos { return x.Rparen + 1 }
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ func TestCommentMap(t *testing.T) {
|
||||||
}
|
}
|
||||||
cmap := NewCommentMap(fset, f, f.Comments)
|
cmap := NewCommentMap(fset, f, f.Comments)
|
||||||
|
|
||||||
// very correct association of comments
|
// verify correct association of comments
|
||||||
for n, list := range cmap {
|
for n, list := range cmap {
|
||||||
key := fmt.Sprintf("%2d: %T", fset.Position(n.Pos()).Line, n)
|
key := fmt.Sprintf("%2d: %T", fset.Position(n.Pos()).Line, n)
|
||||||
got := ctext(list)
|
got := ctext(list)
|
||||||
|
|
|
||||||
|
|
@ -113,31 +113,32 @@ func main() {
|
||||||
// 34 . . . . . . . Args: []ast.Expr (len = 1) {
|
// 34 . . . . . . . Args: []ast.Expr (len = 1) {
|
||||||
// 35 . . . . . . . . 0: *ast.BasicLit {
|
// 35 . . . . . . . . 0: *ast.BasicLit {
|
||||||
// 36 . . . . . . . . . ValuePos: 4:10
|
// 36 . . . . . . . . . ValuePos: 4:10
|
||||||
// 37 . . . . . . . . . Kind: STRING
|
// 37 . . . . . . . . . ValueEnd: 4:25
|
||||||
// 38 . . . . . . . . . Value: "\"Hello, World!\""
|
// 38 . . . . . . . . . Kind: STRING
|
||||||
// 39 . . . . . . . . }
|
// 39 . . . . . . . . . Value: "\"Hello, World!\""
|
||||||
// 40 . . . . . . . }
|
// 40 . . . . . . . . }
|
||||||
// 41 . . . . . . . Ellipsis: -
|
// 41 . . . . . . . }
|
||||||
// 42 . . . . . . . Rparen: 4:25
|
// 42 . . . . . . . Ellipsis: -
|
||||||
// 43 . . . . . . }
|
// 43 . . . . . . . Rparen: 4:25
|
||||||
// 44 . . . . . }
|
// 44 . . . . . . }
|
||||||
// 45 . . . . }
|
// 45 . . . . . }
|
||||||
// 46 . . . . Rbrace: 5:1
|
// 46 . . . . }
|
||||||
// 47 . . . }
|
// 47 . . . . Rbrace: 5:1
|
||||||
// 48 . . }
|
// 48 . . . }
|
||||||
// 49 . }
|
// 49 . . }
|
||||||
// 50 . FileStart: 1:1
|
// 50 . }
|
||||||
// 51 . FileEnd: 5:3
|
// 51 . FileStart: 1:1
|
||||||
// 52 . Scope: *ast.Scope {
|
// 52 . FileEnd: 5:3
|
||||||
// 53 . . Objects: map[string]*ast.Object (len = 1) {
|
// 53 . Scope: *ast.Scope {
|
||||||
// 54 . . . "main": *(obj @ 11)
|
// 54 . . Objects: map[string]*ast.Object (len = 1) {
|
||||||
// 55 . . }
|
// 55 . . . "main": *(obj @ 11)
|
||||||
// 56 . }
|
// 56 . . }
|
||||||
// 57 . Unresolved: []*ast.Ident (len = 1) {
|
// 57 . }
|
||||||
// 58 . . 0: *(obj @ 29)
|
// 58 . Unresolved: []*ast.Ident (len = 1) {
|
||||||
// 59 . }
|
// 59 . . 0: *(obj @ 29)
|
||||||
// 60 . GoVersion: ""
|
// 60 . }
|
||||||
// 61 }
|
// 61 . GoVersion: ""
|
||||||
|
// 62 }
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExamplePreorder() {
|
func ExamplePreorder() {
|
||||||
|
|
|
||||||
|
|
@ -335,6 +335,7 @@ var depsRules = `
|
||||||
< internal/gover
|
< internal/gover
|
||||||
< go/version
|
< go/version
|
||||||
< go/token
|
< go/token
|
||||||
|
< go/internal/scannerhooks
|
||||||
< go/scanner
|
< go/scanner
|
||||||
< go/ast
|
< go/ast
|
||||||
< go/internal/typeparams;
|
< go/internal/typeparams;
|
||||||
|
|
|
||||||
11
src/go/internal/scannerhooks/hooks.go
Normal file
11
src/go/internal/scannerhooks/hooks.go
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
// Copyright 2025 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package scannerhooks defines nonexported channels between parser and scanner.
|
||||||
|
// Ideally this package could be eliminated by adding API to scanner.
|
||||||
|
package scannerhooks
|
||||||
|
|
||||||
|
import "go/token"
|
||||||
|
|
||||||
|
var StringEnd func(scanner any) token.Pos
|
||||||
|
|
@ -28,6 +28,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"go/ast"
|
"go/ast"
|
||||||
"go/build/constraint"
|
"go/build/constraint"
|
||||||
|
"go/internal/scannerhooks"
|
||||||
"go/scanner"
|
"go/scanner"
|
||||||
"go/token"
|
"go/token"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
@ -55,6 +56,7 @@ type parser struct {
|
||||||
pos token.Pos // token position
|
pos token.Pos // token position
|
||||||
tok token.Token // one token look-ahead
|
tok token.Token // one token look-ahead
|
||||||
lit string // token literal
|
lit string // token literal
|
||||||
|
stringEnd token.Pos // position immediately after token; STRING only
|
||||||
|
|
||||||
// Error recovery
|
// Error recovery
|
||||||
// (used to limit the number of calls to parser.advance
|
// (used to limit the number of calls to parser.advance
|
||||||
|
|
@ -163,6 +165,10 @@ func (p *parser) next0() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if p.tok == token.STRING {
|
||||||
|
p.stringEnd = scannerhooks.StringEnd(&p.scanner)
|
||||||
|
}
|
||||||
|
|
||||||
// Found a non-comment; top of file is over.
|
// Found a non-comment; top of file is over.
|
||||||
p.top = false
|
p.top = false
|
||||||
}
|
}
|
||||||
|
|
@ -720,7 +726,7 @@ func (p *parser) parseFieldDecl() *ast.Field {
|
||||||
|
|
||||||
var tag *ast.BasicLit
|
var tag *ast.BasicLit
|
||||||
if p.tok == token.STRING {
|
if p.tok == token.STRING {
|
||||||
tag = &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit}
|
tag = &ast.BasicLit{ValuePos: p.pos, ValueEnd: p.stringEnd, Kind: p.tok, Value: p.lit}
|
||||||
p.next()
|
p.next()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1474,7 +1480,11 @@ func (p *parser) parseOperand() ast.Expr {
|
||||||
return x
|
return x
|
||||||
|
|
||||||
case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING:
|
case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING:
|
||||||
x := &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit}
|
end := p.pos + token.Pos(len(p.lit))
|
||||||
|
if p.tok == token.STRING {
|
||||||
|
end = p.stringEnd
|
||||||
|
}
|
||||||
|
x := &ast.BasicLit{ValuePos: p.pos, ValueEnd: end, Kind: p.tok, Value: p.lit}
|
||||||
p.next()
|
p.next()
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
@ -2511,9 +2521,11 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as
|
||||||
}
|
}
|
||||||
|
|
||||||
pos := p.pos
|
pos := p.pos
|
||||||
|
end := p.pos
|
||||||
var path string
|
var path string
|
||||||
if p.tok == token.STRING {
|
if p.tok == token.STRING {
|
||||||
path = p.lit
|
path = p.lit
|
||||||
|
end = p.stringEnd
|
||||||
p.next()
|
p.next()
|
||||||
} else if p.tok.IsLiteral() {
|
} else if p.tok.IsLiteral() {
|
||||||
p.error(pos, "import path must be a string")
|
p.error(pos, "import path must be a string")
|
||||||
|
|
@ -2528,7 +2540,7 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as
|
||||||
spec := &ast.ImportSpec{
|
spec := &ast.ImportSpec{
|
||||||
Doc: doc,
|
Doc: doc,
|
||||||
Name: ident,
|
Name: ident,
|
||||||
Path: &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path},
|
Path: &ast.BasicLit{ValuePos: pos, ValueEnd: end, Kind: token.STRING, Value: path},
|
||||||
Comment: comment,
|
Comment: comment,
|
||||||
}
|
}
|
||||||
p.imports = append(p.imports, spec)
|
p.imports = append(p.imports, spec)
|
||||||
|
|
|
||||||
|
|
@ -946,3 +946,53 @@ func _() {}
|
||||||
t.Errorf("unexpected doc comment %v", docComment2)
|
t.Errorf("unexpected doc comment %v", docComment2)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests of BasicLit.End() method, which in go1.26 started precisely
|
||||||
|
// recording the Value token's end position instead of heuristically
|
||||||
|
// computing it, which is inaccurate for strings containing "\r".
|
||||||
|
func TestBasicLit_End(t *testing.T) {
|
||||||
|
// lit is a raw string literal containing [a b c \r \n],
|
||||||
|
// denoting "abc\n", because the scanner normalizes \r\n to \n.
|
||||||
|
const stringlit = "`abc\r\n`"
|
||||||
|
|
||||||
|
// The semicolons exercise the case in which the next token
|
||||||
|
// (a SEMICOLON implied by a \n) isn't immediate but follows
|
||||||
|
// some horizontal space.
|
||||||
|
const src = `package p
|
||||||
|
|
||||||
|
import ` + stringlit + ` ;
|
||||||
|
|
||||||
|
type _ struct{ x int ` + stringlit + ` }
|
||||||
|
|
||||||
|
const _ = ` + stringlit + ` ;
|
||||||
|
`
|
||||||
|
|
||||||
|
fset := token.NewFileSet()
|
||||||
|
f, _ := ParseFile(fset, "", src, ParseComments|SkipObjectResolution)
|
||||||
|
tokFile := fset.File(f.Pos())
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
ast.Inspect(f, func(n ast.Node) bool {
|
||||||
|
if lit, ok := n.(*ast.BasicLit); ok {
|
||||||
|
count++
|
||||||
|
var (
|
||||||
|
start = tokFile.Offset(lit.Pos())
|
||||||
|
end = tokFile.Offset(lit.End())
|
||||||
|
)
|
||||||
|
|
||||||
|
// Check BasicLit.Value.
|
||||||
|
if want := "`abc\n`"; lit.Value != want {
|
||||||
|
t.Errorf("%s: BasicLit.Value = %q, want %q", fset.Position(lit.Pos()), lit.Value, want)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check source extent.
|
||||||
|
if got := src[start:end]; got != stringlit {
|
||||||
|
t.Errorf("%s: src[BasicLit.Pos:End] = %q, want %q", fset.Position(lit.Pos()), got, stringlit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
if count != 3 {
|
||||||
|
t.Errorf("found %d BasicLit, want 3", count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ package scanner
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"go/internal/scannerhooks"
|
||||||
"go/token"
|
"go/token"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
@ -41,11 +42,19 @@ type Scanner struct {
|
||||||
lineOffset int // current line offset
|
lineOffset int // current line offset
|
||||||
insertSemi bool // insert a semicolon before next newline
|
insertSemi bool // insert a semicolon before next newline
|
||||||
nlPos token.Pos // position of newline in preceding comment
|
nlPos token.Pos // position of newline in preceding comment
|
||||||
|
stringEnd token.Pos // end position; defined only for STRING tokens
|
||||||
|
|
||||||
// public state - ok to modify
|
// public state - ok to modify
|
||||||
ErrorCount int // number of errors encountered
|
ErrorCount int // number of errors encountered
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Provide go/parser with backdoor access to the StringEnd information.
|
||||||
|
func init() {
|
||||||
|
scannerhooks.StringEnd = func(scanner any) token.Pos {
|
||||||
|
return scanner.(*Scanner).stringEnd
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
bom = 0xFEFF // byte order mark, only permitted as very first character
|
bom = 0xFEFF // byte order mark, only permitted as very first character
|
||||||
eof = -1 // end of file
|
eof = -1 // end of file
|
||||||
|
|
@ -691,7 +700,7 @@ func stripCR(b []byte, comment bool) []byte {
|
||||||
return c[:i]
|
return c[:i]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scanner) scanRawString() string {
|
func (s *Scanner) scanRawString() (string, int) {
|
||||||
// '`' opening already consumed
|
// '`' opening already consumed
|
||||||
offs := s.offset - 1
|
offs := s.offset - 1
|
||||||
|
|
||||||
|
|
@ -712,11 +721,12 @@ func (s *Scanner) scanRawString() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
lit := s.src[offs:s.offset]
|
lit := s.src[offs:s.offset]
|
||||||
|
rawLen := len(lit)
|
||||||
if hasCR {
|
if hasCR {
|
||||||
lit = stripCR(lit, false)
|
lit = stripCR(lit, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
return string(lit)
|
return string(lit), rawLen
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scanner) skipWhitespace() {
|
func (s *Scanner) skipWhitespace() {
|
||||||
|
|
@ -850,6 +860,7 @@ scanAgain:
|
||||||
insertSemi = true
|
insertSemi = true
|
||||||
tok = token.STRING
|
tok = token.STRING
|
||||||
lit = s.scanString()
|
lit = s.scanString()
|
||||||
|
s.stringEnd = pos + token.Pos(len(lit))
|
||||||
case '\'':
|
case '\'':
|
||||||
insertSemi = true
|
insertSemi = true
|
||||||
tok = token.CHAR
|
tok = token.CHAR
|
||||||
|
|
@ -857,7 +868,9 @@ scanAgain:
|
||||||
case '`':
|
case '`':
|
||||||
insertSemi = true
|
insertSemi = true
|
||||||
tok = token.STRING
|
tok = token.STRING
|
||||||
lit = s.scanRawString()
|
var rawLen int
|
||||||
|
lit, rawLen = s.scanRawString()
|
||||||
|
s.stringEnd = pos + token.Pos(rawLen)
|
||||||
case ':':
|
case ':':
|
||||||
tok = s.switch2(token.COLON, token.DEFINE)
|
tok = s.switch2(token.COLON, token.DEFINE)
|
||||||
case '.':
|
case '.':
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue