[dev.inline] cmd/compile/internal/syntax: clean up error and pragma handling

Reviewed in and cherry-picked from https://go-review.googlesource.com/#/c/33873/.

- simplify error handling in source.go
  (move handling of first error into parser, where it belongs)

- clean up error handling in scanner.go

- move pragma and position base handling from scanner
  to parser where it belongs

- have separate error methods in parser to avoid confusion
  with handlers from scanner.go and source.go

- (source.go) and (scanner.go, source.go, tokens.go)
  may be stand-alone packages if so desired, which means
  these files are now less entangled and easier to maintain

Change-Id: I81510fc7ef943b78eaa49092c0eab2075a05878c
Reviewed-on: https://go-review.googlesource.com/34235
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Robert Griesemer 2016-12-01 22:04:49 -08:00
parent e97c8a592f
commit 54ef0447fe
4 changed files with 128 additions and 102 deletions

View file

@ -7,6 +7,7 @@ package syntax
import (
"fmt"
"io"
"strconv"
"strings"
)
@ -19,21 +20,53 @@ const trace = false
const gcCompat = true
type parser struct {
base *PosBase
errh ErrorHandler
scanner
first error // first error encountered
pragma Pragma // pragma flags
fnest int // function nesting level (for error handling)
xnest int // expression nesting level (for complit ambiguity resolution)
indent []byte // tracing support
}
func (p *parser) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
p.scanner.init(filename, src, errh, pragh)
p.base = NewFileBase(filename)
p.errh = errh
p.scanner.init(src, p.error_at, func(line, col uint, text string) {
if strings.HasPrefix(text, "line ") {
p.updateBase(line, col, text[5:])
}
if pragh != nil {
p.pragma |= pragh(line, text)
}
}, gcCompat)
p.first = nil
p.pragma = 0
p.fnest = 0
p.xnest = 0
p.indent = nil
}
func (p *parser) updateBase(line, col uint, text string) {
// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
if i < 0 {
return
}
nstr := text[i+1:]
n, err := strconv.Atoi(nstr)
if err != nil || n <= 0 || n > lineMax {
p.error_at(line, col+uint(i+1), "invalid line number: "+nstr)
return
}
p.base = NewLinePragmaBase(MakePos(p.base.Pos().Base(), line, col), text[:i], uint(n))
}
func (p *parser) got(tok token) bool {
if p.tok == tok {
p.next()
@ -52,12 +85,24 @@ func (p *parser) want(tok token) {
// ----------------------------------------------------------------------------
// Error handling
// syntax_error reports a syntax error at the current line.
func (p *parser) syntax_error(msg string) {
p.syntax_error_at(p.line, p.col, msg)
// error reports an error at the given position.
func (p *parser) error_at(line, col uint, msg string) {
err := Error{line, col, msg}
if p.first == nil {
p.first = err
}
if p.errh == nil {
panic(p.first)
}
p.errh(err)
}
// Like syntax_error, but reports error at given line rather than current lexer line.
// error reports a (non-syntax) error at the current token position.
func (p *parser) error(msg string) {
p.error_at(p.line, p.col, msg)
}
// syntax_error_at reports a syntax error at the given position.
func (p *parser) syntax_error_at(line, col uint, msg string) {
if trace {
defer p.trace("syntax_error (" + msg + ")")()
@ -102,6 +147,11 @@ func (p *parser) syntax_error_at(line, col uint, msg string) {
p.error_at(line, col, "syntax error: unexpected "+tok+msg)
}
// syntax_error reports a syntax error at the current token position.
func (p *parser) syntax_error(msg string) {
p.syntax_error_at(p.line, p.col, msg)
}
// The stopset contains keywords that start a statement.
// They are good synchronization points in case of syntax
// errors and (usually) shouldn't be skipped over.

View file

@ -2,39 +2,43 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements scanner, a lexical tokenizer for
// Go source. After initialization, consecutive calls of
// next advance the scanner one token at a time.
//
// This file, source.go, and tokens.go are self-contained
// (go tool compile scanner.go source.go tokens.go compiles)
// and thus could be made into its own package.
package syntax
import (
"fmt"
"io"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
type scanner struct {
source
pragh func(line, col uint, msg string)
gcCompat bool // TODO(gri) remove this eventually (only here so we can build w/o parser)
nlsemi bool // if set '\n' and EOF translate to ';'
pragma Pragma
// current token, valid after calling next()
base *PosBase
line, col uint
tok token
lit string // valid if tok is _Name or _Literal
kind LitKind // valid if tok is _Literal
op Operator // valid if tok is _Operator, _AssignOp, or _IncOp
prec int // valid if tok is _Operator, _AssignOp, or _IncOp
pragh PragmaHandler
}
func (s *scanner) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string), gcCompat bool) {
s.source.init(src, errh)
s.nlsemi = false
s.base = NewFileBase(filename)
s.pragh = pragh
s.gcCompat = gcCompat
s.nlsemi = false
}
func (s *scanner) next() {
@ -331,7 +335,7 @@ func (s *scanner) ident() {
}
func (s *scanner) isCompatRune(c rune, start bool) bool {
if !gcCompat || c < utf8.RuneSelf {
if !s.gcCompat || c < utf8.RuneSelf {
return false
}
if start && unicode.IsNumber(c) {
@ -461,7 +465,7 @@ func (s *scanner) stdString() {
break
}
if r < 0 {
s.error_at(s.line, s.col, "string not terminated")
s.errh(s.line, s.col, "string not terminated")
break
}
}
@ -481,7 +485,7 @@ func (s *scanner) rawString() {
break
}
if r < 0 {
s.error_at(s.line, s.col, "string not terminated")
s.errh(s.line, s.col, "string not terminated")
break
}
}
@ -538,23 +542,18 @@ func (s *scanner) skipLine(r rune) {
}
func (s *scanner) lineComment() {
// recognize pragmas
prefix := ""
r := s.getr()
switch r {
case 'g':
if s.pragh == nil {
s.skipLine(r)
return
}
prefix = "go:"
case 'l':
prefix = "line "
default:
if s.pragh == nil || (r != 'g' && r != 'l') {
s.skipLine(r)
return
}
// s.pragh != nil && (r == 'g' || r == 'l')
// recognize pragmas
prefix := "go:"
if r == 'l' {
prefix = "line "
}
for _, m := range prefix {
if r != m {
s.skipLine(r)
@ -563,34 +562,15 @@ func (s *scanner) lineComment() {
r = s.getr()
}
// pragma text without prefix and line ending (which may be "\r\n" if Windows)
// pragma text without line ending (which may be "\r\n" if Windows),
s.startLit()
s.skipLine(r)
text := strings.TrimSuffix(string(s.stopLit()), "\r")
// process //line filename:line pragma
if prefix[0] == 'l' {
// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
if i < 0 {
return
}
nstr := text[i+1:]
n, err := strconv.Atoi(nstr)
if err != nil || n <= 0 || n > lineMax {
s.error_at(s.line0, s.col0-uint(len(nstr)), "invalid line number: "+nstr)
return
}
s.base = NewLinePragmaBase(MakePos(s.base.Pos().Base(), s.line, s.col), text[:i], uint(n))
// TODO(gri) Return here once we rely exclusively
// on node positions for line number information,
// and remove //line pragma handling elsewhere.
if s.pragh == nil {
return
}
text := s.stopLit()
if i := len(text) - 1; i >= 0 && text[i] == '\r' {
text = text[:i]
}
s.pragma |= s.pragh(s.line, prefix+text)
s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since pragma text starts after //
}
func (s *scanner) fullComment() {
@ -603,7 +583,7 @@ func (s *scanner) fullComment() {
}
}
if r < 0 {
s.error_at(s.line, s.col, "comment not terminated")
s.errh(s.line, s.col, "comment not terminated")
return
}
}
@ -651,7 +631,7 @@ func (s *scanner) escape(quote rune) bool {
if c < 0 {
return true // complain in caller about EOF
}
if gcCompat {
if s.gcCompat {
name := "hex"
if base == 8 {
name = "octal"

View file

@ -22,7 +22,7 @@ func TestScanner(t *testing.T) {
defer src.Close()
var s scanner
s.init("parser.go", src, nil, nil)
s.init(src, nil, nil, false)
for {
s.next()
if s.tok == _EOF {
@ -51,7 +51,7 @@ func TestTokens(t *testing.T) {
// scan source
var got scanner
got.init("", &bytesReader{buf}, nil, nil)
got.init(&bytesReader{buf}, nil, nil, false)
got.next()
for i, want := range sampleTokens {
nlsemi := false
@ -317,38 +317,38 @@ func TestScanErrors(t *testing.T) {
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 19},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 19},
{`//line :`, "invalid line number: ", 1, 9},
{`//line :x`, "invalid line number: x", 1, 9},
{`//line foo :`, "invalid line number: ", 1, 13},
{`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
{`/**///line foo:x`, "invalid line number: x", 1, 16},
{`//line foo:0`, "invalid line number: 0", 1, 12},
{fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
// TODO(gri) move these test cases into an appropriate parser test
// {`//line :`, "invalid line number: ", 1, 9},
// {`//line :x`, "invalid line number: x", 1, 9},
// {`//line foo :`, "invalid line number: ", 1, 13},
// {`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
// {`/**///line foo:x`, "invalid line number: x", 1, 16},
// {`//line foo:0`, "invalid line number: 0", 1, 12},
// {fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
// former problem cases
{"package p\n\n\xef", "invalid UTF-8 encoding", 3, 1},
} {
var s scanner
nerrors := 0
s.init("", &bytesReader{[]byte(test.src)}, func(err error) {
s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
nerrors++
// only check the first error
e := err.(Error) // we know it's an Error
if nerrors == 1 {
if e.Msg != test.msg {
t.Errorf("%q: got msg = %q; want %q", test.src, e.Msg, test.msg)
if msg != test.msg {
t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
}
if e.Line != test.line {
t.Errorf("%q: got line = %d; want %d", test.src, e.Line, test.line)
if line != test.line {
t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
}
if e.Col != test.col {
t.Errorf("%q: got col = %d; want %d", test.src, e.Col, test.col)
if col != test.col {
t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
}
} else if nerrors > 1 {
// TODO(gri) make this use position info
t.Errorf("%q: got unexpected %q at line = %d", test.src, e.Msg, e.Line)
t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
}
}, nil)
}, nil, true)
for {
s.next()

View file

@ -7,6 +7,9 @@
// Contiguous sequences of runes (literals) are extracted
// directly as []byte without the need to re-encode the
// runes in UTF-8 (as would be necessary with bufio.Reader).
//
// This file is self-contained (go tool compile source.go
// compiles) and thus could be made into its own package.
package syntax
@ -22,8 +25,7 @@ import (
type source struct {
src io.Reader
errh ErrorHandler
first error // first error encountered
errh func(line, pos uint, msg string)
// source buffer
buf [4 << 10]byte
@ -31,44 +33,30 @@ type source struct {
r0, r, w int // previous/current read and write buf positions, excluding sentinel
line0, line uint // previous/current line
col0, col uint // previous/current column
err error // pending io error
ioerr error // pending io error
// literal buffer
lit []byte // literal prefix
suf int // literal suffix; suf >= 0 means we are scanning a literal
}
func (s *source) init(src io.Reader, errh ErrorHandler) {
// init initializes source to read from src and to report errors via errh.
// errh must not be nil.
func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) {
s.src = src
s.errh = errh
s.first = nil
s.buf[0] = utf8.RuneSelf // terminate with sentinel
s.offs = 0
s.r0, s.r, s.w = 0, 0, 0
s.line0, s.line = 1, 1
s.col0, s.col = 1, 1
s.err = nil
s.ioerr = nil
s.lit = s.lit[:0]
s.suf = -1
}
func (s *source) error(msg string) {
s.error_at(s.line0, s.col0, msg)
}
func (s *source) error_at(line, col uint, msg string) {
err := Error{line, col, msg}
if s.first == nil {
s.first = err
}
if s.errh == nil {
panic(s.first)
}
s.errh(err)
}
// ungetr ungets the most recently read rune.
func (s *source) ungetr() {
s.r, s.line, s.col = s.r0, s.line0, s.col0
@ -84,6 +72,14 @@ func (s *source) ungetr2() {
s.col0--
}
func (s *source) error(msg string) {
s.errh(s.line0, s.col0, msg)
}
// getr reads and returns the next rune.
// If an error occurs, the error handler provided to init
// is called with position (line and column) information
// and error message before getr returns.
func (s *source) getr() rune {
redo:
s.r0, s.line0, s.col0 = s.r, s.line, s.col
@ -94,7 +90,7 @@ redo:
// in the buffer. Measure and optimize if necessary.
// make sure we have at least one rune in buffer, or we are at EOF
for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) {
s.fill() // s.w-s.r < len(s.buf) => buffer is not full
}
@ -116,8 +112,8 @@ redo:
// EOF
if s.r == s.w {
if s.err != io.EOF {
s.error(s.err.Error())
if s.ioerr != io.EOF {
s.error(s.ioerr.Error())
}
return -1
}
@ -174,13 +170,13 @@ func (s *source) fill() {
if n > 0 || err != nil {
s.buf[s.w] = utf8.RuneSelf // sentinel
if err != nil {
s.err = err
s.ioerr = err
}
return
}
}
s.err = io.ErrNoProgress
s.ioerr = io.ErrNoProgress
}
func (s *source) startLit() {