[dev.inline] cmd/compile/internal/syntax: clean up error and pragma handling

Reviewed in and cherry-picked from https://go-review.googlesource.com/#/c/33873/. - simplify error handling in source.go (move handling of first error into parser, where it belongs) - clean up error handling in scanner.go - move pragma and position base handling from scanner to parser where it belongs - have separate error methods in parser to avoid confusion with handlers from scanner.go and source.go - (source.go) and (scanner.go, source.go, tokens.go) may be stand-alone packages if so desired, which means these files are now less entangled and easier to maintain Change-Id: I81510fc7ef943b78eaa49092c0eab2075a05878c Reviewed-on: https://go-review.googlesource.com/34235 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2025-12-08 06:10:04 +00:00 · 2016-12-01 22:04:49 -08:00 · 2016-12-01 22:04:49 -08:00 · 54ef0447fe
commit 54ef0447fe
parent e97c8a592f
4 changed files with 128 additions and 102 deletions
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
@ -7,6 +7,7 @@ package syntax
 import (
 	"fmt"
 	"io"
+	"strconv"
 	"strings"
 )

@ -19,21 +20,53 @@ const trace = false
 const gcCompat = true

 type parser struct {
+	base *PosBase
+	errh ErrorHandler
 	scanner

+	first  error  // first error encountered
+	pragma Pragma // pragma flags
+
 	fnest  int    // function nesting level (for error handling)
 	xnest  int    // expression nesting level (for complit ambiguity resolution)
 	indent []byte // tracing support
 }

 func (p *parser) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
-	p.scanner.init(filename, src, errh, pragh)
+	p.base = NewFileBase(filename)
+	p.errh = errh
+	p.scanner.init(src, p.error_at, func(line, col uint, text string) {
+		if strings.HasPrefix(text, "line ") {
+			p.updateBase(line, col, text[5:])
+		}
+		if pragh != nil {
+			p.pragma |= pragh(line, text)
+		}
+	}, gcCompat)
+
+	p.first = nil
+	p.pragma = 0

 	p.fnest = 0
 	p.xnest = 0
 	p.indent = nil
 }

+func (p *parser) updateBase(line, col uint, text string) {
+	// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
+	i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
+	if i < 0 {
+		return
+	}
+	nstr := text[i+1:]
+	n, err := strconv.Atoi(nstr)
+	if err != nil || n <= 0 || n > lineMax {
+		p.error_at(line, col+uint(i+1), "invalid line number: "+nstr)
+		return
+	}
+	p.base = NewLinePragmaBase(MakePos(p.base.Pos().Base(), line, col), text[:i], uint(n))
+}
+
 func (p *parser) got(tok token) bool {
 	if p.tok == tok {
 		p.next()
@ -52,12 +85,24 @@ func (p *parser) want(tok token) {
 // ----------------------------------------------------------------------------
 // Error handling

-// syntax_error reports a syntax error at the current line.
-func (p *parser) syntax_error(msg string) {
-	p.syntax_error_at(p.line, p.col, msg)
+// error reports an error at the given position.
+func (p *parser) error_at(line, col uint, msg string) {
+	err := Error{line, col, msg}
+	if p.first == nil {
+		p.first = err
+	}
+	if p.errh == nil {
+		panic(p.first)
+	}
+	p.errh(err)
 }

-// Like syntax_error, but reports error at given line rather than current lexer line.
+// error reports a (non-syntax) error at the current token position.
+func (p *parser) error(msg string) {
+	p.error_at(p.line, p.col, msg)
+}
+
+// syntax_error_at reports a syntax error at the given position.
 func (p *parser) syntax_error_at(line, col uint, msg string) {
 	if trace {
 		defer p.trace("syntax_error (" + msg + ")")()
@ -102,6 +147,11 @@ func (p *parser) syntax_error_at(line, col uint, msg string) {
 	p.error_at(line, col, "syntax error: unexpected "+tok+msg)
 }

+// syntax_error reports a syntax error at the current token position.
+func (p *parser) syntax_error(msg string) {
+	p.syntax_error_at(p.line, p.col, msg)
+}
+
 // The stopset contains keywords that start a statement.
 // They are good synchronization points in case of syntax
 // errors and (usually) shouldn't be skipped over.
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@ -2,39 +2,43 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// This file implements scanner, a lexical tokenizer for
+// Go source. After initialization, consecutive calls of
+// next advance the scanner one token at a time.
+//
+// This file, source.go, and tokens.go are self-contained
+// (go tool compile scanner.go source.go tokens.go compiles)
+// and thus could be made into its own package.
+
 package syntax

 import (
 	"fmt"
 	"io"
-	"strconv"
-	"strings"
 	"unicode"
 	"unicode/utf8"
 )

 type scanner struct {
 	source
+	pragh    func(line, col uint, msg string)
+	gcCompat bool // TODO(gri) remove this eventually (only here so we can build w/o parser)
 	nlsemi   bool // if set '\n' and EOF translate to ';'
-	pragma Pragma

 	// current token, valid after calling next()
-	base      *PosBase
 	line, col uint
 	tok       token
 	lit       string   // valid if tok is _Name or _Literal
 	kind      LitKind  // valid if tok is _Literal
 	op        Operator // valid if tok is _Operator, _AssignOp, or _IncOp
 	prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
-
-	pragh PragmaHandler
 }

-func (s *scanner) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
+func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string), gcCompat bool) {
 	s.source.init(src, errh)
-	s.nlsemi = false
-	s.base = NewFileBase(filename)
 	s.pragh = pragh
+	s.gcCompat = gcCompat
+	s.nlsemi = false
 }

 func (s *scanner) next() {
@ -331,7 +335,7 @@ func (s *scanner) ident() {
 }

 func (s *scanner) isCompatRune(c rune, start bool) bool {
-	if !gcCompat || c < utf8.RuneSelf {
+	if !s.gcCompat || c < utf8.RuneSelf {
 		return false
 	}
 	if start && unicode.IsNumber(c) {
@ -461,7 +465,7 @@ func (s *scanner) stdString() {
 			break
 		}
 		if r < 0 {
-			s.error_at(s.line, s.col, "string not terminated")
+			s.errh(s.line, s.col, "string not terminated")
 			break
 		}
 	}
@ -481,7 +485,7 @@ func (s *scanner) rawString() {
 			break
 		}
 		if r < 0 {
-			s.error_at(s.line, s.col, "string not terminated")
+			s.errh(s.line, s.col, "string not terminated")
 			break
 		}
 	}
@ -538,23 +542,18 @@ func (s *scanner) skipLine(r rune) {
 }

 func (s *scanner) lineComment() {
-	// recognize pragmas
-	prefix := ""
 	r := s.getr()
-	switch r {
-	case 'g':
-		if s.pragh == nil {
-			s.skipLine(r)
-			return
-		}
-		prefix = "go:"
-	case 'l':
-		prefix = "line "
-	default:
+	if s.pragh == nil || (r != 'g' && r != 'l') {
 		s.skipLine(r)
 		return
 	}
+	// s.pragh != nil && (r == 'g' || r == 'l')

+	// recognize pragmas
+	prefix := "go:"
+	if r == 'l' {
+		prefix = "line "
+	}
 	for _, m := range prefix {
 		if r != m {
 			s.skipLine(r)
@ -563,34 +562,15 @@ func (s *scanner) lineComment() {
 		r = s.getr()
 	}

-	// pragma text without prefix and line ending (which may be "\r\n" if Windows)
+	// pragma text without line ending (which may be "\r\n" if Windows),
 	s.startLit()
 	s.skipLine(r)
-	text := strings.TrimSuffix(string(s.stopLit()), "\r")
-
-	// process //line filename:line pragma
-	if prefix[0] == 'l' {
-		// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
-		i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
-		if i < 0 {
-			return
-		}
-		nstr := text[i+1:]
-		n, err := strconv.Atoi(nstr)
-		if err != nil || n <= 0 || n > lineMax {
-			s.error_at(s.line0, s.col0-uint(len(nstr)), "invalid line number: "+nstr)
-			return
-		}
-		s.base = NewLinePragmaBase(MakePos(s.base.Pos().Base(), s.line, s.col), text[:i], uint(n))
-		// TODO(gri) Return here once we rely exclusively
-		// on node positions for line number information,
-		// and remove //line pragma handling elsewhere.
-		if s.pragh == nil {
-			return
-		}
+	text := s.stopLit()
+	if i := len(text) - 1; i >= 0 && text[i] == '\r' {
+		text = text[:i]
 	}

-	s.pragma |= s.pragh(s.line, prefix+text)
+	s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since pragma text starts after //
 }

 func (s *scanner) fullComment() {
@ -603,7 +583,7 @@ func (s *scanner) fullComment() {
 			}
 		}
 		if r < 0 {
-			s.error_at(s.line, s.col, "comment not terminated")
+			s.errh(s.line, s.col, "comment not terminated")
 			return
 		}
 	}
@ -651,7 +631,7 @@ func (s *scanner) escape(quote rune) bool {
 			if c < 0 {
 				return true // complain in caller about EOF
 			}
-			if gcCompat {
+			if s.gcCompat {
 				name := "hex"
 				if base == 8 {
 					name = "octal"
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@ -22,7 +22,7 @@ func TestScanner(t *testing.T) {
 	defer src.Close()

 	var s scanner
-	s.init("parser.go", src, nil, nil)
+	s.init(src, nil, nil, false)
 	for {
 		s.next()
 		if s.tok == _EOF {
@ -51,7 +51,7 @@ func TestTokens(t *testing.T) {

 	// scan source
 	var got scanner
-	got.init("", &bytesReader{buf}, nil, nil)
+	got.init(&bytesReader{buf}, nil, nil, false)
 	got.next()
 	for i, want := range sampleTokens {
 		nlsemi := false
@ -317,38 +317,38 @@ func TestScanErrors(t *testing.T) {
 		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 19},
 		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 19},

-		{`//line :`, "invalid line number: ", 1, 9},
-		{`//line :x`, "invalid line number: x", 1, 9},
-		{`//line foo :`, "invalid line number: ", 1, 13},
-		{`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
-		{`/**///line foo:x`, "invalid line number: x", 1, 16},
-		{`//line foo:0`, "invalid line number: 0", 1, 12},
-		{fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
+		// TODO(gri) move these test cases into an appropriate parser test
+		// {`//line :`, "invalid line number: ", 1, 9},
+		// {`//line :x`, "invalid line number: x", 1, 9},
+		// {`//line foo :`, "invalid line number: ", 1, 13},
+		// {`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
+		// {`/**///line foo:x`, "invalid line number: x", 1, 16},
+		// {`//line foo:0`, "invalid line number: 0", 1, 12},
+		// {fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},

 		// former problem cases
 		{"package p\n\n\xef", "invalid UTF-8 encoding", 3, 1},
 	} {
 		var s scanner
 		nerrors := 0
-		s.init("", &bytesReader{[]byte(test.src)}, func(err error) {
+		s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
 			nerrors++
 			// only check the first error
-			e := err.(Error) // we know it's an Error
 			if nerrors == 1 {
-				if e.Msg != test.msg {
-					t.Errorf("%q: got msg = %q; want %q", test.src, e.Msg, test.msg)
+				if msg != test.msg {
+					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
 				}
-				if e.Line != test.line {
-					t.Errorf("%q: got line = %d; want %d", test.src, e.Line, test.line)
+				if line != test.line {
+					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
 				}
-				if e.Col != test.col {
-					t.Errorf("%q: got col = %d; want %d", test.src, e.Col, test.col)
+				if col != test.col {
+					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
 				}
 			} else if nerrors > 1 {
 				// TODO(gri) make this use position info
-				t.Errorf("%q: got unexpected %q at line = %d", test.src, e.Msg, e.Line)
+				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
 			}
-		}, nil)
+		}, nil, true)

 		for {
 			s.next()
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@ -7,6 +7,9 @@
 // Contiguous sequences of runes (literals) are extracted
 // directly as []byte without the need to re-encode the
 // runes in UTF-8 (as would be necessary with bufio.Reader).
+//
+// This file is self-contained (go tool compile source.go
+// compiles) and thus could be made into its own package.

 package syntax

@ -22,8 +25,7 @@ import (

 type source struct {
 	src  io.Reader
-	errh  ErrorHandler
-	first error // first error encountered
+	errh func(line, pos uint, msg string)

 	// source buffer
 	buf         [4 << 10]byte
@ -31,44 +33,30 @@ type source struct {
 	r0, r, w    int   // previous/current read and write buf positions, excluding sentinel
 	line0, line uint  // previous/current line
 	col0, col   uint  // previous/current column
-	err         error // pending io error
+	ioerr       error // pending io error

 	// literal buffer
 	lit []byte // literal prefix
 	suf int    // literal suffix; suf >= 0 means we are scanning a literal
 }

-func (s *source) init(src io.Reader, errh ErrorHandler) {
+// init initializes source to read from src and to report errors via errh.
+// errh must not be nil.
+func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) {
 	s.src = src
 	s.errh = errh
-	s.first = nil

 	s.buf[0] = utf8.RuneSelf // terminate with sentinel
 	s.offs = 0
 	s.r0, s.r, s.w = 0, 0, 0
 	s.line0, s.line = 1, 1
 	s.col0, s.col = 1, 1
-	s.err = nil
+	s.ioerr = nil

 	s.lit = s.lit[:0]
 	s.suf = -1
 }

-func (s *source) error(msg string) {
-	s.error_at(s.line0, s.col0, msg)
-}
-
-func (s *source) error_at(line, col uint, msg string) {
-	err := Error{line, col, msg}
-	if s.first == nil {
-		s.first = err
-	}
-	if s.errh == nil {
-		panic(s.first)
-	}
-	s.errh(err)
-}
-
 // ungetr ungets the most recently read rune.
 func (s *source) ungetr() {
 	s.r, s.line, s.col = s.r0, s.line0, s.col0
@ -84,6 +72,14 @@ func (s *source) ungetr2() {
 	s.col0--
 }

+func (s *source) error(msg string) {
+	s.errh(s.line0, s.col0, msg)
+}
+
+// getr reads and returns the next rune.
+// If an error occurs, the error handler provided to init
+// is called with position (line and column) information
+// and error message before getr returns.
 func (s *source) getr() rune {
 redo:
 	s.r0, s.line0, s.col0 = s.r, s.line, s.col
@ -94,7 +90,7 @@ redo:
 	// in the buffer. Measure and optimize if necessary.

 	// make sure we have at least one rune in buffer, or we are at EOF
-	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
+	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) {
 		s.fill() // s.w-s.r < len(s.buf) => buffer is not full
 	}

@ -116,8 +112,8 @@ redo:

 	// EOF
 	if s.r == s.w {
-		if s.err != io.EOF {
-			s.error(s.err.Error())
+		if s.ioerr != io.EOF {
+			s.error(s.ioerr.Error())
 		}
 		return -1
 	}
@ -174,13 +170,13 @@ func (s *source) fill() {
 		if n > 0 || err != nil {
 			s.buf[s.w] = utf8.RuneSelf // sentinel
 			if err != nil {
-				s.err = err
+				s.ioerr = err
 			}
 			return
 		}
 	}

-	s.err = io.ErrNoProgress
+	s.ioerr = io.ErrNoProgress
 }

 func (s *source) startLit() {