[dev.inline] cmd/compile/internal/syntax: clean up error and pragma handling

Reviewed in and cherry-picked from https://go-review.googlesource.com/#/c/33873/. - simplify error handling in source.go (move handling of first error into parser, where it belongs) - clean up error handling in scanner.go - move pragma and position base handling from scanner to parser where it belongs - have separate error methods in parser to avoid confusion with handlers from scanner.go and source.go - (source.go) and (scanner.go, source.go, tokens.go) may be stand-alone packages if so desired, which means these files are now less entangled and easier to maintain Change-Id: I81510fc7ef943b78eaa49092c0eab2075a05878c Reviewed-on: https://go-review.googlesource.com/34235 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2025-12-08 06:10:04 +00:00 · 2016-12-01 22:04:49 -08:00 · 2016-12-01 22:04:49 -08:00 · 54ef0447fe
commit 54ef0447fe
parent e97c8a592f
4 changed files with 128 additions and 102 deletions
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
@ -7,6 +7,7 @@ package syntax
 import (
 	"fmt"
 	"io"
 	"strconv"
 	"strings"
 )
@ -19,21 +20,53 @@ const trace = false
 const gcCompat = true
 type parser struct {
 	base *PosBase
 	errh ErrorHandler
 	scanner
 	first  error  // first error encountered
 	pragma Pragma // pragma flags
 	fnest  int    // function nesting level (for error handling)
 	xnest  int    // expression nesting level (for complit ambiguity resolution)
 	indent []byte // tracing support
 }
 func (p *parser) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
-	p.scanner.init(filename, src, errh, pragh)
+	p.base = NewFileBase(filename)
 	p.errh = errh
 	p.scanner.init(src, p.error_at, func(line, col uint, text string) {
 		if strings.HasPrefix(text, "line ") {
 			p.updateBase(line, col, text[5:])
 		}
 		if pragh != nil {
 			p.pragma |= pragh(line, text)
 		}
 	}, gcCompat)
 	p.first = nil
 	p.pragma = 0
 	p.fnest = 0
 	p.xnest = 0
 	p.indent = nil
 }
 func (p *parser) updateBase(line, col uint, text string) {
 	// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
 	i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
 	if i < 0 {
 		return
 	}
 	nstr := text[i+1:]
 	n, err := strconv.Atoi(nstr)
 	if err != nil || n <= 0 || n > lineMax {
 		p.error_at(line, col+uint(i+1), "invalid line number: "+nstr)
 		return
 	}
 	p.base = NewLinePragmaBase(MakePos(p.base.Pos().Base(), line, col), text[:i], uint(n))
 }
 func (p *parser) got(tok token) bool {
 	if p.tok == tok {
 		p.next()
@ -52,12 +85,24 @@ func (p *parser) want(tok token) {
 // ----------------------------------------------------------------------------
 // Error handling
-// syntax_error reports a syntax error at the current line.
+// error reports an error at the given position.
-func (p *parser) syntax_error(msg string) {
+func (p *parser) error_at(line, col uint, msg string) {
-	p.syntax_error_at(p.line, p.col, msg)
+	err := Error{line, col, msg}
 	if p.first == nil {
 		p.first = err
 	}
 	if p.errh == nil {
 		panic(p.first)
 	}
 	p.errh(err)
 }
-// Like syntax_error, but reports error at given line rather than current lexer line.
+// error reports a (non-syntax) error at the current token position.
 func (p *parser) error(msg string) {
 	p.error_at(p.line, p.col, msg)
 }
 // syntax_error_at reports a syntax error at the given position.
 func (p *parser) syntax_error_at(line, col uint, msg string) {
 	if trace {
 		defer p.trace("syntax_error (" + msg + ")")()
@ -102,6 +147,11 @@ func (p *parser) syntax_error_at(line, col uint, msg string) {
 	p.error_at(line, col, "syntax error: unexpected "+tok+msg)
 }
 // syntax_error reports a syntax error at the current token position.
 func (p *parser) syntax_error(msg string) {
 	p.syntax_error_at(p.line, p.col, msg)
 }
 // The stopset contains keywords that start a statement.
 // They are good synchronization points in case of syntax
 // errors and (usually) shouldn't be skipped over.
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@ -2,39 +2,43 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // This file implements scanner, a lexical tokenizer for
 // Go source. After initialization, consecutive calls of
 // next advance the scanner one token at a time.
 //
 // This file, source.go, and tokens.go are self-contained
 // (go tool compile scanner.go source.go tokens.go compiles)
 // and thus could be made into its own package.
 package syntax
 import (
 	"fmt"
 	"io"
 	"strconv"
 	"strings"
 	"unicode"
 	"unicode/utf8"
 )
 type scanner struct {
 	source
-	nlsemi bool // if set '\n' and EOF translate to ';'
+	pragh    func(line, col uint, msg string)
-	pragma Pragma
+	gcCompat bool // TODO(gri) remove this eventually (only here so we can build w/o parser)
 	nlsemi   bool // if set '\n' and EOF translate to ';'
 	// current token, valid after calling next()
 	base      *PosBase
 	line, col uint
 	tok       token
 	lit       string   // valid if tok is _Name or _Literal
 	kind      LitKind  // valid if tok is _Literal
 	op        Operator // valid if tok is _Operator, _AssignOp, or _IncOp
 	prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
 	pragh PragmaHandler
 }
-func (s *scanner) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
+func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string), gcCompat bool) {
 	s.source.init(src, errh)
 	s.nlsemi = false
 	s.base = NewFileBase(filename)
 	s.pragh = pragh
 	s.gcCompat = gcCompat
 	s.nlsemi = false
 }
 func (s *scanner) next() {
@ -331,7 +335,7 @@ func (s *scanner) ident() {
 }
 func (s *scanner) isCompatRune(c rune, start bool) bool {
-	if !gcCompat || c < utf8.RuneSelf {
+	if !s.gcCompat || c < utf8.RuneSelf {
 		return false
 	}
 	if start && unicode.IsNumber(c) {
@ -461,7 +465,7 @@ func (s *scanner) stdString() {
 			break
 		}
 		if r < 0 {
-			s.error_at(s.line, s.col, "string not terminated")
+			s.errh(s.line, s.col, "string not terminated")
 			break
 		}
 	}
@ -481,7 +485,7 @@ func (s *scanner) rawString() {
 			break
 		}
 		if r < 0 {
-			s.error_at(s.line, s.col, "string not terminated")
+			s.errh(s.line, s.col, "string not terminated")
 			break
 		}
 	}
@ -538,23 +542,18 @@ func (s *scanner) skipLine(r rune) {
 }
 func (s *scanner) lineComment() {
 	// recognize pragmas
 	prefix := ""
 	r := s.getr()
-	switch r {
+	if s.pragh == nil || (r != 'g' && r != 'l') {
 	case 'g':
 		if s.pragh == nil {
 			s.skipLine(r)
 			return
 		}
 		prefix = "go:"
 	case 'l':
 		prefix = "line "
 	default:
 		s.skipLine(r)
 		return
 	}
 	// s.pragh != nil && (r == 'g' || r == 'l')
 	// recognize pragmas
 	prefix := "go:"
 	if r == 'l' {
 		prefix = "line "
 	}
 	for _, m := range prefix {
 		if r != m {
 			s.skipLine(r)
@ -563,34 +562,15 @@ func (s *scanner) lineComment() {
 		r = s.getr()
 	}
-	// pragma text without prefix and line ending (which may be "\r\n" if Windows)
+	// pragma text without line ending (which may be "\r\n" if Windows),
 	s.startLit()
 	s.skipLine(r)
-	text := strings.TrimSuffix(string(s.stopLit()), "\r")
+	text := s.stopLit()
-
+	if i := len(text) - 1; i >= 0 && text[i] == '\r' {
-	// process //line filename:line pragma
+		text = text[:i]
 	if prefix[0] == 'l' {
 		// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
 		i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
 		if i < 0 {
 			return
 		}
 		nstr := text[i+1:]
 		n, err := strconv.Atoi(nstr)
 		if err != nil || n <= 0 || n > lineMax {
 			s.error_at(s.line0, s.col0-uint(len(nstr)), "invalid line number: "+nstr)
 			return
 		}
 		s.base = NewLinePragmaBase(MakePos(s.base.Pos().Base(), s.line, s.col), text[:i], uint(n))
 		// TODO(gri) Return here once we rely exclusively
 		// on node positions for line number information,
 		// and remove //line pragma handling elsewhere.
 		if s.pragh == nil {
 			return
 		}
 	}
-	s.pragma |= s.pragh(s.line, prefix+text)
+	s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since pragma text starts after //
 }
 func (s *scanner) fullComment() {
@ -603,7 +583,7 @@ func (s *scanner) fullComment() {
 			}
 		}
 		if r < 0 {
-			s.error_at(s.line, s.col, "comment not terminated")
+			s.errh(s.line, s.col, "comment not terminated")
 			return
 		}
 	}
@ -651,7 +631,7 @@ func (s *scanner) escape(quote rune) bool {
 			if c < 0 {
 				return true // complain in caller about EOF
 			}
-			if gcCompat {
+			if s.gcCompat {
 				name := "hex"
 				if base == 8 {
 					name = "octal"
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@ -22,7 +22,7 @@ func TestScanner(t *testing.T) {
 	defer src.Close()
 	var s scanner
-	s.init("parser.go", src, nil, nil)
+	s.init(src, nil, nil, false)
 	for {
 		s.next()
 		if s.tok == _EOF {
@ -51,7 +51,7 @@ func TestTokens(t *testing.T) {
 	// scan source
 	var got scanner
-	got.init("", &bytesReader{buf}, nil, nil)
+	got.init(&bytesReader{buf}, nil, nil, false)
 	got.next()
 	for i, want := range sampleTokens {
 		nlsemi := false
@ -317,38 +317,38 @@ func TestScanErrors(t *testing.T) {
 		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 19},
 		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 19},
-		{`//line :`, "invalid line number: ", 1, 9},
+		// TODO(gri) move these test cases into an appropriate parser test
-		{`//line :x`, "invalid line number: x", 1, 9},
+		// {`//line :`, "invalid line number: ", 1, 9},
-		{`//line foo :`, "invalid line number: ", 1, 13},
+		// {`//line :x`, "invalid line number: x", 1, 9},
-		{`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
+		// {`//line foo :`, "invalid line number: ", 1, 13},
-		{`/**///line foo:x`, "invalid line number: x", 1, 16},
+		// {`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
-		{`//line foo:0`, "invalid line number: 0", 1, 12},
+		// {`/**///line foo:x`, "invalid line number: x", 1, 16},
-		{fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
+		// {`//line foo:0`, "invalid line number: 0", 1, 12},
 		// {fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
 		// former problem cases
 		{"package p\n\n\xef", "invalid UTF-8 encoding", 3, 1},
 	} {
 		var s scanner
 		nerrors := 0
-		s.init("", &bytesReader{[]byte(test.src)}, func(err error) {
+		s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
 			nerrors++
 			// only check the first error
 			e := err.(Error) // we know it's an Error
 			if nerrors == 1 {
-				if e.Msg != test.msg {
+				if msg != test.msg {
-					t.Errorf("%q: got msg = %q; want %q", test.src, e.Msg, test.msg)
+					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
 				}
-				if e.Line != test.line {
+				if line != test.line {
-					t.Errorf("%q: got line = %d; want %d", test.src, e.Line, test.line)
+					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
 				}
-				if e.Col != test.col {
+				if col != test.col {
-					t.Errorf("%q: got col = %d; want %d", test.src, e.Col, test.col)
+					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
 				}
 			} else if nerrors > 1 {
 				// TODO(gri) make this use position info
-				t.Errorf("%q: got unexpected %q at line = %d", test.src, e.Msg, e.Line)
+				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
 			}
-		}, nil)
+		}, nil, true)
 		for {
 			s.next()
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@ -7,6 +7,9 @@
 // Contiguous sequences of runes (literals) are extracted
 // directly as []byte without the need to re-encode the
 // runes in UTF-8 (as would be necessary with bufio.Reader).
 //
 // This file is self-contained (go tool compile source.go
 // compiles) and thus could be made into its own package.
 package syntax
@ -21,9 +24,8 @@ import (
 //        suf     r0  r            w
 type source struct {
-	src   io.Reader
+	src  io.Reader
-	errh  ErrorHandler
+	errh func(line, pos uint, msg string)
 	first error // first error encountered
 	// source buffer
 	buf         [4 << 10]byte
@ -31,44 +33,30 @@ type source struct {
 	r0, r, w    int   // previous/current read and write buf positions, excluding sentinel
 	line0, line uint  // previous/current line
 	col0, col   uint  // previous/current column
-	err         error // pending io error
+	ioerr       error // pending io error
 	// literal buffer
 	lit []byte // literal prefix
 	suf int    // literal suffix; suf >= 0 means we are scanning a literal
 }
-func (s *source) init(src io.Reader, errh ErrorHandler) {
+// init initializes source to read from src and to report errors via errh.
 // errh must not be nil.
 func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) {
 	s.src = src
 	s.errh = errh
 	s.first = nil
 	s.buf[0] = utf8.RuneSelf // terminate with sentinel
 	s.offs = 0
 	s.r0, s.r, s.w = 0, 0, 0
 	s.line0, s.line = 1, 1
 	s.col0, s.col = 1, 1
-	s.err = nil
+	s.ioerr = nil
 	s.lit = s.lit[:0]
 	s.suf = -1
 }
 func (s *source) error(msg string) {
 	s.error_at(s.line0, s.col0, msg)
 }
 func (s *source) error_at(line, col uint, msg string) {
 	err := Error{line, col, msg}
 	if s.first == nil {
 		s.first = err
 	}
 	if s.errh == nil {
 		panic(s.first)
 	}
 	s.errh(err)
 }
 // ungetr ungets the most recently read rune.
 func (s *source) ungetr() {
 	s.r, s.line, s.col = s.r0, s.line0, s.col0
@ -84,6 +72,14 @@ func (s *source) ungetr2() {
 	s.col0--
 }
 func (s *source) error(msg string) {
 	s.errh(s.line0, s.col0, msg)
 }
 // getr reads and returns the next rune.
 // If an error occurs, the error handler provided to init
 // is called with position (line and column) information
 // and error message before getr returns.
 func (s *source) getr() rune {
 redo:
 	s.r0, s.line0, s.col0 = s.r, s.line, s.col
@ -94,7 +90,7 @@ redo:
 	// in the buffer. Measure and optimize if necessary.
 	// make sure we have at least one rune in buffer, or we are at EOF
-	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
+	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) {
 		s.fill() // s.w-s.r < len(s.buf) => buffer is not full
 	}
@ -116,8 +112,8 @@ redo:
 	// EOF
 	if s.r == s.w {
-		if s.err != io.EOF {
+		if s.ioerr != io.EOF {
-			s.error(s.err.Error())
+			s.error(s.ioerr.Error())
 		}
 		return -1
 	}
@ -174,13 +170,13 @@ func (s *source) fill() {
 		if n > 0 || err != nil {
 			s.buf[s.w] = utf8.RuneSelf // sentinel
 			if err != nil {
-				s.err = err
+				s.ioerr = err
 			}
 			return
 		}
 	}
-	s.err = io.ErrNoProgress
+	s.ioerr = io.ErrNoProgress
 }
 func (s *source) startLit() {