[dev.inline] cmd/compile/internal/syntax: clean up error and pragma handling

Reviewed in and cherry-picked from https://go-review.googlesource.com/#/c/33873/.

- simplify error handling in source.go
  (move handling of first error into parser, where it belongs)

- clean up error handling in scanner.go

- move pragma and position base handling from scanner
  to parser where it belongs

- have separate error methods in parser to avoid confusion
  with handlers from scanner.go and source.go

- (source.go) and (scanner.go, source.go, tokens.go)
  may be stand-alone packages if so desired, which means
  these files are now less entangled and easier to maintain

Change-Id: I81510fc7ef943b78eaa49092c0eab2075a05878c
Reviewed-on: https://go-review.googlesource.com/34235
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Robert Griesemer 2016-12-01 22:04:49 -08:00
parent e97c8a592f
commit 54ef0447fe
4 changed files with 128 additions and 102 deletions

View file

@ -7,6 +7,7 @@ package syntax
import ( import (
"fmt" "fmt"
"io" "io"
"strconv"
"strings" "strings"
) )
@ -19,21 +20,53 @@ const trace = false
const gcCompat = true const gcCompat = true
type parser struct { type parser struct {
base *PosBase
errh ErrorHandler
scanner scanner
first error // first error encountered
pragma Pragma // pragma flags
fnest int // function nesting level (for error handling) fnest int // function nesting level (for error handling)
xnest int // expression nesting level (for complit ambiguity resolution) xnest int // expression nesting level (for complit ambiguity resolution)
indent []byte // tracing support indent []byte // tracing support
} }
func (p *parser) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) { func (p *parser) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) {
p.scanner.init(filename, src, errh, pragh) p.base = NewFileBase(filename)
p.errh = errh
p.scanner.init(src, p.error_at, func(line, col uint, text string) {
if strings.HasPrefix(text, "line ") {
p.updateBase(line, col, text[5:])
}
if pragh != nil {
p.pragma |= pragh(line, text)
}
}, gcCompat)
p.first = nil
p.pragma = 0
p.fnest = 0 p.fnest = 0
p.xnest = 0 p.xnest = 0
p.indent = nil p.indent = nil
} }
func (p *parser) updateBase(line, col uint, text string) {
// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
if i < 0 {
return
}
nstr := text[i+1:]
n, err := strconv.Atoi(nstr)
if err != nil || n <= 0 || n > lineMax {
p.error_at(line, col+uint(i+1), "invalid line number: "+nstr)
return
}
p.base = NewLinePragmaBase(MakePos(p.base.Pos().Base(), line, col), text[:i], uint(n))
}
func (p *parser) got(tok token) bool { func (p *parser) got(tok token) bool {
if p.tok == tok { if p.tok == tok {
p.next() p.next()
@ -52,12 +85,24 @@ func (p *parser) want(tok token) {
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Error handling // Error handling
// syntax_error reports a syntax error at the current line. // error reports an error at the given position.
func (p *parser) syntax_error(msg string) { func (p *parser) error_at(line, col uint, msg string) {
p.syntax_error_at(p.line, p.col, msg) err := Error{line, col, msg}
if p.first == nil {
p.first = err
}
if p.errh == nil {
panic(p.first)
}
p.errh(err)
} }
// Like syntax_error, but reports error at given line rather than current lexer line. // error reports a (non-syntax) error at the current token position.
func (p *parser) error(msg string) {
p.error_at(p.line, p.col, msg)
}
// syntax_error_at reports a syntax error at the given position.
func (p *parser) syntax_error_at(line, col uint, msg string) { func (p *parser) syntax_error_at(line, col uint, msg string) {
if trace { if trace {
defer p.trace("syntax_error (" + msg + ")")() defer p.trace("syntax_error (" + msg + ")")()
@ -102,6 +147,11 @@ func (p *parser) syntax_error_at(line, col uint, msg string) {
p.error_at(line, col, "syntax error: unexpected "+tok+msg) p.error_at(line, col, "syntax error: unexpected "+tok+msg)
} }
// syntax_error reports a syntax error at the current token position.
func (p *parser) syntax_error(msg string) {
p.syntax_error_at(p.line, p.col, msg)
}
// The stopset contains keywords that start a statement. // The stopset contains keywords that start a statement.
// They are good synchronization points in case of syntax // They are good synchronization points in case of syntax
// errors and (usually) shouldn't be skipped over. // errors and (usually) shouldn't be skipped over.

View file

@ -2,39 +2,43 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// This file implements scanner, a lexical tokenizer for
// Go source. After initialization, consecutive calls of
// next advance the scanner one token at a time.
//
// This file, source.go, and tokens.go are self-contained
// (go tool compile scanner.go source.go tokens.go compiles)
// and thus could be made into its own package.
package syntax package syntax
import ( import (
"fmt" "fmt"
"io" "io"
"strconv"
"strings"
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
) )
type scanner struct { type scanner struct {
source source
nlsemi bool // if set '\n' and EOF translate to ';' pragh func(line, col uint, msg string)
pragma Pragma gcCompat bool // TODO(gri) remove this eventually (only here so we can build w/o parser)
nlsemi bool // if set '\n' and EOF translate to ';'
// current token, valid after calling next() // current token, valid after calling next()
base *PosBase
line, col uint line, col uint
tok token tok token
lit string // valid if tok is _Name or _Literal lit string // valid if tok is _Name or _Literal
kind LitKind // valid if tok is _Literal kind LitKind // valid if tok is _Literal
op Operator // valid if tok is _Operator, _AssignOp, or _IncOp op Operator // valid if tok is _Operator, _AssignOp, or _IncOp
prec int // valid if tok is _Operator, _AssignOp, or _IncOp prec int // valid if tok is _Operator, _AssignOp, or _IncOp
pragh PragmaHandler
} }
func (s *scanner) init(filename string, src io.Reader, errh ErrorHandler, pragh PragmaHandler) { func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string), gcCompat bool) {
s.source.init(src, errh) s.source.init(src, errh)
s.nlsemi = false
s.base = NewFileBase(filename)
s.pragh = pragh s.pragh = pragh
s.gcCompat = gcCompat
s.nlsemi = false
} }
func (s *scanner) next() { func (s *scanner) next() {
@ -331,7 +335,7 @@ func (s *scanner) ident() {
} }
func (s *scanner) isCompatRune(c rune, start bool) bool { func (s *scanner) isCompatRune(c rune, start bool) bool {
if !gcCompat || c < utf8.RuneSelf { if !s.gcCompat || c < utf8.RuneSelf {
return false return false
} }
if start && unicode.IsNumber(c) { if start && unicode.IsNumber(c) {
@ -461,7 +465,7 @@ func (s *scanner) stdString() {
break break
} }
if r < 0 { if r < 0 {
s.error_at(s.line, s.col, "string not terminated") s.errh(s.line, s.col, "string not terminated")
break break
} }
} }
@ -481,7 +485,7 @@ func (s *scanner) rawString() {
break break
} }
if r < 0 { if r < 0 {
s.error_at(s.line, s.col, "string not terminated") s.errh(s.line, s.col, "string not terminated")
break break
} }
} }
@ -538,23 +542,18 @@ func (s *scanner) skipLine(r rune) {
} }
func (s *scanner) lineComment() { func (s *scanner) lineComment() {
// recognize pragmas
prefix := ""
r := s.getr() r := s.getr()
switch r { if s.pragh == nil || (r != 'g' && r != 'l') {
case 'g':
if s.pragh == nil {
s.skipLine(r)
return
}
prefix = "go:"
case 'l':
prefix = "line "
default:
s.skipLine(r) s.skipLine(r)
return return
} }
// s.pragh != nil && (r == 'g' || r == 'l')
// recognize pragmas
prefix := "go:"
if r == 'l' {
prefix = "line "
}
for _, m := range prefix { for _, m := range prefix {
if r != m { if r != m {
s.skipLine(r) s.skipLine(r)
@ -563,34 +562,15 @@ func (s *scanner) lineComment() {
r = s.getr() r = s.getr()
} }
// pragma text without prefix and line ending (which may be "\r\n" if Windows) // pragma text without line ending (which may be "\r\n" if Windows),
s.startLit() s.startLit()
s.skipLine(r) s.skipLine(r)
text := strings.TrimSuffix(string(s.stopLit()), "\r") text := s.stopLit()
if i := len(text) - 1; i >= 0 && text[i] == '\r' {
// process //line filename:line pragma text = text[:i]
if prefix[0] == 'l' {
// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
if i < 0 {
return
}
nstr := text[i+1:]
n, err := strconv.Atoi(nstr)
if err != nil || n <= 0 || n > lineMax {
s.error_at(s.line0, s.col0-uint(len(nstr)), "invalid line number: "+nstr)
return
}
s.base = NewLinePragmaBase(MakePos(s.base.Pos().Base(), s.line, s.col), text[:i], uint(n))
// TODO(gri) Return here once we rely exclusively
// on node positions for line number information,
// and remove //line pragma handling elsewhere.
if s.pragh == nil {
return
}
} }
s.pragma |= s.pragh(s.line, prefix+text) s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since pragma text starts after //
} }
func (s *scanner) fullComment() { func (s *scanner) fullComment() {
@ -603,7 +583,7 @@ func (s *scanner) fullComment() {
} }
} }
if r < 0 { if r < 0 {
s.error_at(s.line, s.col, "comment not terminated") s.errh(s.line, s.col, "comment not terminated")
return return
} }
} }
@ -651,7 +631,7 @@ func (s *scanner) escape(quote rune) bool {
if c < 0 { if c < 0 {
return true // complain in caller about EOF return true // complain in caller about EOF
} }
if gcCompat { if s.gcCompat {
name := "hex" name := "hex"
if base == 8 { if base == 8 {
name = "octal" name = "octal"

View file

@ -22,7 +22,7 @@ func TestScanner(t *testing.T) {
defer src.Close() defer src.Close()
var s scanner var s scanner
s.init("parser.go", src, nil, nil) s.init(src, nil, nil, false)
for { for {
s.next() s.next()
if s.tok == _EOF { if s.tok == _EOF {
@ -51,7 +51,7 @@ func TestTokens(t *testing.T) {
// scan source // scan source
var got scanner var got scanner
got.init("", &bytesReader{buf}, nil, nil) got.init(&bytesReader{buf}, nil, nil, false)
got.next() got.next()
for i, want := range sampleTokens { for i, want := range sampleTokens {
nlsemi := false nlsemi := false
@ -317,38 +317,38 @@ func TestScanErrors(t *testing.T) {
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 19}, {`var s string = "\x"`, "non-hex character in escape sequence: \"", 1, 19},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 19}, {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 1, 19},
{`//line :`, "invalid line number: ", 1, 9}, // TODO(gri) move these test cases into an appropriate parser test
{`//line :x`, "invalid line number: x", 1, 9}, // {`//line :`, "invalid line number: ", 1, 9},
{`//line foo :`, "invalid line number: ", 1, 13}, // {`//line :x`, "invalid line number: x", 1, 9},
{`//line foo:123abc`, "invalid line number: 123abc", 1, 12}, // {`//line foo :`, "invalid line number: ", 1, 13},
{`/**///line foo:x`, "invalid line number: x", 1, 16}, // {`//line foo:123abc`, "invalid line number: 123abc", 1, 12},
{`//line foo:0`, "invalid line number: 0", 1, 12}, // {`/**///line foo:x`, "invalid line number: x", 1, 16},
{fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12}, // {`//line foo:0`, "invalid line number: 0", 1, 12},
// {fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), 1, 12},
// former problem cases // former problem cases
{"package p\n\n\xef", "invalid UTF-8 encoding", 3, 1}, {"package p\n\n\xef", "invalid UTF-8 encoding", 3, 1},
} { } {
var s scanner var s scanner
nerrors := 0 nerrors := 0
s.init("", &bytesReader{[]byte(test.src)}, func(err error) { s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
nerrors++ nerrors++
// only check the first error // only check the first error
e := err.(Error) // we know it's an Error
if nerrors == 1 { if nerrors == 1 {
if e.Msg != test.msg { if msg != test.msg {
t.Errorf("%q: got msg = %q; want %q", test.src, e.Msg, test.msg) t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
} }
if e.Line != test.line { if line != test.line {
t.Errorf("%q: got line = %d; want %d", test.src, e.Line, test.line) t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
} }
if e.Col != test.col { if col != test.col {
t.Errorf("%q: got col = %d; want %d", test.src, e.Col, test.col) t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
} }
} else if nerrors > 1 { } else if nerrors > 1 {
// TODO(gri) make this use position info // TODO(gri) make this use position info
t.Errorf("%q: got unexpected %q at line = %d", test.src, e.Msg, e.Line) t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
} }
}, nil) }, nil, true)
for { for {
s.next() s.next()

View file

@ -7,6 +7,9 @@
// Contiguous sequences of runes (literals) are extracted // Contiguous sequences of runes (literals) are extracted
// directly as []byte without the need to re-encode the // directly as []byte without the need to re-encode the
// runes in UTF-8 (as would be necessary with bufio.Reader). // runes in UTF-8 (as would be necessary with bufio.Reader).
//
// This file is self-contained (go tool compile source.go
// compiles) and thus could be made into its own package.
package syntax package syntax
@ -21,9 +24,8 @@ import (
// suf r0 r w // suf r0 r w
type source struct { type source struct {
src io.Reader src io.Reader
errh ErrorHandler errh func(line, pos uint, msg string)
first error // first error encountered
// source buffer // source buffer
buf [4 << 10]byte buf [4 << 10]byte
@ -31,44 +33,30 @@ type source struct {
r0, r, w int // previous/current read and write buf positions, excluding sentinel r0, r, w int // previous/current read and write buf positions, excluding sentinel
line0, line uint // previous/current line line0, line uint // previous/current line
col0, col uint // previous/current column col0, col uint // previous/current column
err error // pending io error ioerr error // pending io error
// literal buffer // literal buffer
lit []byte // literal prefix lit []byte // literal prefix
suf int // literal suffix; suf >= 0 means we are scanning a literal suf int // literal suffix; suf >= 0 means we are scanning a literal
} }
func (s *source) init(src io.Reader, errh ErrorHandler) { // init initializes source to read from src and to report errors via errh.
// errh must not be nil.
func (s *source) init(src io.Reader, errh func(line, pos uint, msg string)) {
s.src = src s.src = src
s.errh = errh s.errh = errh
s.first = nil
s.buf[0] = utf8.RuneSelf // terminate with sentinel s.buf[0] = utf8.RuneSelf // terminate with sentinel
s.offs = 0 s.offs = 0
s.r0, s.r, s.w = 0, 0, 0 s.r0, s.r, s.w = 0, 0, 0
s.line0, s.line = 1, 1 s.line0, s.line = 1, 1
s.col0, s.col = 1, 1 s.col0, s.col = 1, 1
s.err = nil s.ioerr = nil
s.lit = s.lit[:0] s.lit = s.lit[:0]
s.suf = -1 s.suf = -1
} }
func (s *source) error(msg string) {
s.error_at(s.line0, s.col0, msg)
}
func (s *source) error_at(line, col uint, msg string) {
err := Error{line, col, msg}
if s.first == nil {
s.first = err
}
if s.errh == nil {
panic(s.first)
}
s.errh(err)
}
// ungetr ungets the most recently read rune. // ungetr ungets the most recently read rune.
func (s *source) ungetr() { func (s *source) ungetr() {
s.r, s.line, s.col = s.r0, s.line0, s.col0 s.r, s.line, s.col = s.r0, s.line0, s.col0
@ -84,6 +72,14 @@ func (s *source) ungetr2() {
s.col0-- s.col0--
} }
func (s *source) error(msg string) {
s.errh(s.line0, s.col0, msg)
}
// getr reads and returns the next rune.
// If an error occurs, the error handler provided to init
// is called with position (line and column) information
// and error message before getr returns.
func (s *source) getr() rune { func (s *source) getr() rune {
redo: redo:
s.r0, s.line0, s.col0 = s.r, s.line, s.col s.r0, s.line0, s.col0 = s.r, s.line, s.col
@ -94,7 +90,7 @@ redo:
// in the buffer. Measure and optimize if necessary. // in the buffer. Measure and optimize if necessary.
// make sure we have at least one rune in buffer, or we are at EOF // make sure we have at least one rune in buffer, or we are at EOF
for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) { for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.ioerr == nil && s.w-s.r < len(s.buf) {
s.fill() // s.w-s.r < len(s.buf) => buffer is not full s.fill() // s.w-s.r < len(s.buf) => buffer is not full
} }
@ -116,8 +112,8 @@ redo:
// EOF // EOF
if s.r == s.w { if s.r == s.w {
if s.err != io.EOF { if s.ioerr != io.EOF {
s.error(s.err.Error()) s.error(s.ioerr.Error())
} }
return -1 return -1
} }
@ -174,13 +170,13 @@ func (s *source) fill() {
if n > 0 || err != nil { if n > 0 || err != nil {
s.buf[s.w] = utf8.RuneSelf // sentinel s.buf[s.w] = utf8.RuneSelf // sentinel
if err != nil { if err != nil {
s.err = err s.ioerr = err
} }
return return
} }
} }
s.err = io.ErrNoProgress s.ioerr = io.ErrNoProgress
} }
func (s *source) startLit() { func (s *source) startLit() {