diff --git a/api/next/76031.txt b/api/next/76031.txt new file mode 100644 index 00000000000..049edc7a569 --- /dev/null +++ b/api/next/76031.txt @@ -0,0 +1 @@ +pkg go/ast, type BasicLit struct, ValueEnd token.Pos #76031 diff --git a/doc/next/6-stdlib/99-minor/go/ast/76031.md b/doc/next/6-stdlib/99-minor/go/ast/76031.md new file mode 100644 index 00000000000..964872f416a --- /dev/null +++ b/doc/next/6-stdlib/99-minor/go/ast/76031.md @@ -0,0 +1,5 @@ +The new [BasicLit.ValueEnd] field records the precise end position of +a literal so that the [BasicLit.End] method can now always return the +correct answer. (Previously it was computed using a heuristic that was +incorrect for multi-line raw string literals in Windows source files, +due to removal of carriage returns.) diff --git a/src/go/ast/ast.go b/src/go/ast/ast.go index a6dab5bb517..37fc3c96662 100644 --- a/src/go/ast/ast.go +++ b/src/go/ast/ast.go @@ -312,11 +312,10 @@ type ( // // For raw string literals (Kind == token.STRING && Value[0] == '`'), // the Value field contains the string text without carriage returns (\r) that - // may have been present in the source. Because the end position is - // computed using len(Value), the position reported by [BasicLit.End] does not match the - // true source end position for raw string literals containing carriage returns. + // may have been present in the source. BasicLit struct { ValuePos token.Pos // literal position + ValueEnd token.Pos // position immediately after the literal Kind token.Token // token.INT, token.FLOAT, token.IMAG, token.CHAR, or token.STRING Value string // literal string; e.g. 42, 0x7f, 3.14, 1e-9, 2.4i, 'a', '\x7f', "foo" or `\m\n\o` } @@ -535,7 +534,15 @@ func (x *Ellipsis) End() token.Pos { } return x.Ellipsis + 3 // len("...") } -func (x *BasicLit) End() token.Pos { return token.Pos(int(x.ValuePos) + len(x.Value)) } +func (x *BasicLit) End() token.Pos { + if !x.ValueEnd.IsValid() { + // Not from parser; use a heuristic. + // (Incorrect for `...` containing \r\n; + // see https://go.dev/issue/76031.) + return token.Pos(int(x.ValuePos) + len(x.Value)) + } + return x.ValueEnd +} func (x *FuncLit) End() token.Pos { return x.Body.End() } func (x *CompositeLit) End() token.Pos { return x.Rbrace + 1 } func (x *ParenExpr) End() token.Pos { return x.Rparen + 1 } diff --git a/src/go/ast/commentmap_test.go b/src/go/ast/commentmap_test.go index f0faeed610a..0d5e8de0137 100644 --- a/src/go/ast/commentmap_test.go +++ b/src/go/ast/commentmap_test.go @@ -109,7 +109,7 @@ func TestCommentMap(t *testing.T) { } cmap := NewCommentMap(fset, f, f.Comments) - // very correct association of comments + // verify correct association of comments for n, list := range cmap { key := fmt.Sprintf("%2d: %T", fset.Position(n.Pos()).Line, n) got := ctext(list) diff --git a/src/go/ast/example_test.go b/src/go/ast/example_test.go index 31b32efece9..36daa7e7e1e 100644 --- a/src/go/ast/example_test.go +++ b/src/go/ast/example_test.go @@ -113,31 +113,32 @@ func main() { // 34 . . . . . . . Args: []ast.Expr (len = 1) { // 35 . . . . . . . . 0: *ast.BasicLit { // 36 . . . . . . . . . ValuePos: 4:10 - // 37 . . . . . . . . . Kind: STRING - // 38 . . . . . . . . . Value: "\"Hello, World!\"" - // 39 . . . . . . . . } - // 40 . . . . . . . } - // 41 . . . . . . . Ellipsis: - - // 42 . . . . . . . Rparen: 4:25 - // 43 . . . . . . } - // 44 . . . . . } - // 45 . . . . } - // 46 . . . . Rbrace: 5:1 - // 47 . . . } - // 48 . . } - // 49 . } - // 50 . FileStart: 1:1 - // 51 . FileEnd: 5:3 - // 52 . Scope: *ast.Scope { - // 53 . . Objects: map[string]*ast.Object (len = 1) { - // 54 . . . "main": *(obj @ 11) - // 55 . . } - // 56 . } - // 57 . Unresolved: []*ast.Ident (len = 1) { - // 58 . . 0: *(obj @ 29) - // 59 . } - // 60 . GoVersion: "" - // 61 } + // 37 . . . . . . . . . ValueEnd: 4:25 + // 38 . . . . . . . . . Kind: STRING + // 39 . . . . . . . . . Value: "\"Hello, World!\"" + // 40 . . . . . . . . } + // 41 . . . . . . . } + // 42 . . . . . . . Ellipsis: - + // 43 . . . . . . . Rparen: 4:25 + // 44 . . . . . . } + // 45 . . . . . } + // 46 . . . . } + // 47 . . . . Rbrace: 5:1 + // 48 . . . } + // 49 . . } + // 50 . } + // 51 . FileStart: 1:1 + // 52 . FileEnd: 5:3 + // 53 . Scope: *ast.Scope { + // 54 . . Objects: map[string]*ast.Object (len = 1) { + // 55 . . . "main": *(obj @ 11) + // 56 . . } + // 57 . } + // 58 . Unresolved: []*ast.Ident (len = 1) { + // 59 . . 0: *(obj @ 29) + // 60 . } + // 61 . GoVersion: "" + // 62 } } func ExamplePreorder() { diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 868be194c39..5f95535ed94 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -335,6 +335,7 @@ var depsRules = ` < internal/gover < go/version < go/token + < go/internal/scannerhooks < go/scanner < go/ast < go/internal/typeparams; diff --git a/src/go/internal/scannerhooks/hooks.go b/src/go/internal/scannerhooks/hooks.go new file mode 100644 index 00000000000..057261df06c --- /dev/null +++ b/src/go/internal/scannerhooks/hooks.go @@ -0,0 +1,11 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package scannerhooks defines nonexported channels between parser and scanner. +// Ideally this package could be eliminated by adding API to scanner. +package scannerhooks + +import "go/token" + +var StringEnd func(scanner any) token.Pos diff --git a/src/go/parser/parser.go b/src/go/parser/parser.go index e725371e768..e01a221968f 100644 --- a/src/go/parser/parser.go +++ b/src/go/parser/parser.go @@ -28,6 +28,7 @@ import ( "fmt" "go/ast" "go/build/constraint" + "go/internal/scannerhooks" "go/scanner" "go/token" "strings" @@ -52,9 +53,10 @@ type parser struct { goVersion string // minimum Go version found in //go:build comment // Next token - pos token.Pos // token position - tok token.Token // one token look-ahead - lit string // token literal + pos token.Pos // token position + tok token.Token // one token look-ahead + lit string // token literal + stringEnd token.Pos // position immediately after token; STRING only // Error recovery // (used to limit the number of calls to parser.advance @@ -163,6 +165,10 @@ func (p *parser) next0() { continue } } else { + if p.tok == token.STRING { + p.stringEnd = scannerhooks.StringEnd(&p.scanner) + } + // Found a non-comment; top of file is over. p.top = false } @@ -720,7 +726,7 @@ func (p *parser) parseFieldDecl() *ast.Field { var tag *ast.BasicLit if p.tok == token.STRING { - tag = &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit} + tag = &ast.BasicLit{ValuePos: p.pos, ValueEnd: p.stringEnd, Kind: p.tok, Value: p.lit} p.next() } @@ -1474,7 +1480,11 @@ func (p *parser) parseOperand() ast.Expr { return x case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: - x := &ast.BasicLit{ValuePos: p.pos, Kind: p.tok, Value: p.lit} + end := p.pos + token.Pos(len(p.lit)) + if p.tok == token.STRING { + end = p.stringEnd + } + x := &ast.BasicLit{ValuePos: p.pos, ValueEnd: end, Kind: p.tok, Value: p.lit} p.next() return x @@ -2511,9 +2521,11 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as } pos := p.pos + end := p.pos var path string if p.tok == token.STRING { path = p.lit + end = p.stringEnd p.next() } else if p.tok.IsLiteral() { p.error(pos, "import path must be a string") @@ -2528,7 +2540,7 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as spec := &ast.ImportSpec{ Doc: doc, Name: ident, - Path: &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path}, + Path: &ast.BasicLit{ValuePos: pos, ValueEnd: end, Kind: token.STRING, Value: path}, Comment: comment, } p.imports = append(p.imports, spec) diff --git a/src/go/parser/parser_test.go b/src/go/parser/parser_test.go index 87b7d7bbab7..81181892309 100644 --- a/src/go/parser/parser_test.go +++ b/src/go/parser/parser_test.go @@ -946,3 +946,53 @@ func _() {} t.Errorf("unexpected doc comment %v", docComment2) } } + +// Tests of BasicLit.End() method, which in go1.26 started precisely +// recording the Value token's end position instead of heuristically +// computing it, which is inaccurate for strings containing "\r". +func TestBasicLit_End(t *testing.T) { + // lit is a raw string literal containing [a b c \r \n], + // denoting "abc\n", because the scanner normalizes \r\n to \n. + const stringlit = "`abc\r\n`" + + // The semicolons exercise the case in which the next token + // (a SEMICOLON implied by a \n) isn't immediate but follows + // some horizontal space. + const src = `package p + +import ` + stringlit + ` ; + +type _ struct{ x int ` + stringlit + ` } + +const _ = ` + stringlit + ` ; +` + + fset := token.NewFileSet() + f, _ := ParseFile(fset, "", src, ParseComments|SkipObjectResolution) + tokFile := fset.File(f.Pos()) + + count := 0 + ast.Inspect(f, func(n ast.Node) bool { + if lit, ok := n.(*ast.BasicLit); ok { + count++ + var ( + start = tokFile.Offset(lit.Pos()) + end = tokFile.Offset(lit.End()) + ) + + // Check BasicLit.Value. + if want := "`abc\n`"; lit.Value != want { + t.Errorf("%s: BasicLit.Value = %q, want %q", fset.Position(lit.Pos()), lit.Value, want) + } + + // Check source extent. + if got := src[start:end]; got != stringlit { + t.Errorf("%s: src[BasicLit.Pos:End] = %q, want %q", fset.Position(lit.Pos()), got, stringlit) + } + } + return true + }) + if count != 3 { + t.Errorf("found %d BasicLit, want 3", count) + } +} diff --git a/src/go/scanner/scanner.go b/src/go/scanner/scanner.go index cdbeb6323c6..07d987c88f9 100644 --- a/src/go/scanner/scanner.go +++ b/src/go/scanner/scanner.go @@ -10,6 +10,7 @@ package scanner import ( "bytes" "fmt" + "go/internal/scannerhooks" "go/token" "path/filepath" "strconv" @@ -41,11 +42,19 @@ type Scanner struct { lineOffset int // current line offset insertSemi bool // insert a semicolon before next newline nlPos token.Pos // position of newline in preceding comment + stringEnd token.Pos // end position; defined only for STRING tokens // public state - ok to modify ErrorCount int // number of errors encountered } +// Provide go/parser with backdoor access to the StringEnd information. +func init() { + scannerhooks.StringEnd = func(scanner any) token.Pos { + return scanner.(*Scanner).stringEnd + } +} + const ( bom = 0xFEFF // byte order mark, only permitted as very first character eof = -1 // end of file @@ -691,7 +700,7 @@ func stripCR(b []byte, comment bool) []byte { return c[:i] } -func (s *Scanner) scanRawString() string { +func (s *Scanner) scanRawString() (string, int) { // '`' opening already consumed offs := s.offset - 1 @@ -712,11 +721,12 @@ func (s *Scanner) scanRawString() string { } lit := s.src[offs:s.offset] + rawLen := len(lit) if hasCR { lit = stripCR(lit, false) } - return string(lit) + return string(lit), rawLen } func (s *Scanner) skipWhitespace() { @@ -850,6 +860,7 @@ scanAgain: insertSemi = true tok = token.STRING lit = s.scanString() + s.stringEnd = pos + token.Pos(len(lit)) case '\'': insertSemi = true tok = token.CHAR @@ -857,7 +868,9 @@ scanAgain: case '`': insertSemi = true tok = token.STRING - lit = s.scanRawString() + var rawLen int + lit, rawLen = s.scanRawString() + s.stringEnd = pos + token.Pos(rawLen) case ':': tok = s.switch2(token.COLON, token.DEFINE) case '.':