2016-03-04 17:09:08 -08:00
|
|
|
// Copyright 2016 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2016-12-01 22:04:49 -08:00
|
|
|
// This file implements scanner, a lexical tokenizer for
|
|
|
|
|
// Go source. After initialization, consecutive calls of
|
|
|
|
|
// next advance the scanner one token at a time.
|
|
|
|
|
//
|
|
|
|
|
// This file, source.go, and tokens.go are self-contained
|
|
|
|
|
// (go tool compile scanner.go source.go tokens.go compiles)
|
|
|
|
|
// and thus could be made into its own package.
|
|
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
package syntax
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
|
|
|
|
"unicode"
|
|
|
|
|
"unicode/utf8"
|
|
|
|
|
)
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
// The mode flags below control which comments are reported
|
|
|
|
|
// by calling the error handler. If no flag is set, comments
|
|
|
|
|
// are ignored.
|
|
|
|
|
const (
|
|
|
|
|
comments uint = 1 << iota // call handler for all comments
|
|
|
|
|
directives // call handler for directives only
|
|
|
|
|
)
|
|
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
type scanner struct {
|
|
|
|
|
source
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
mode uint
|
2016-12-02 16:22:45 -08:00
|
|
|
nlsemi bool // if set '\n' and EOF translate to ';'
|
2016-03-04 17:09:08 -08:00
|
|
|
|
|
|
|
|
// current token, valid after calling next()
|
2016-11-29 16:13:09 -08:00
|
|
|
line, col uint
|
2016-03-04 17:09:08 -08:00
|
|
|
tok token
|
2019-08-29 17:34:17 -07:00
|
|
|
lit string // valid if tok is _Name, _Literal, or _Semi ("semicolon", "newline", or "EOF"); may be malformed if bad is true
|
|
|
|
|
bad bool // valid if tok is _Literal, true if a syntax error occurred, lit may be malformed
|
2016-03-04 17:09:08 -08:00
|
|
|
kind LitKind // valid if tok is _Literal
|
|
|
|
|
op Operator // valid if tok is _Operator, _AssignOp, or _IncOp
|
|
|
|
|
prec int // valid if tok is _Operator, _AssignOp, or _IncOp
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) {
|
2016-03-04 17:09:08 -08:00
|
|
|
s.source.init(src, errh)
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
s.mode = mode
|
2016-12-01 22:04:49 -08:00
|
|
|
s.nlsemi = false
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-08-28 21:56:47 -07:00
|
|
|
// errorf reports an error at the most recently read character position.
|
2019-01-12 20:33:58 -08:00
|
|
|
func (s *scanner) errorf(format string, args ...interface{}) {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.bad = true
|
2019-01-12 20:33:58 -08:00
|
|
|
s.error(fmt.Sprintf(format, args...))
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-28 21:56:47 -07:00
|
|
|
// errorAtf reports an error at a byte column offset relative to the current token start.
|
|
|
|
|
func (s *scanner) errorAtf(offset int, format string, args ...interface{}) {
|
|
|
|
|
s.bad = true
|
|
|
|
|
s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...))
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-02 10:44:34 -08:00
|
|
|
// next advances the scanner by reading the next token.
|
|
|
|
|
//
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
// If a read, source encoding, or lexical error occurs, next calls
|
|
|
|
|
// the installed error handler with the respective error position
|
|
|
|
|
// and message. The error message is guaranteed to be non-empty and
|
|
|
|
|
// never starts with a '/'. The error handler must exist.
|
|
|
|
|
//
|
|
|
|
|
// If the scanner mode includes the comments flag and a comment
|
|
|
|
|
// (including comments containing directives) is encountered, the
|
|
|
|
|
// error handler is also called with each comment position and text
|
|
|
|
|
// (including opening /* or // and closing */, but without a newline
|
|
|
|
|
// at the end of line comments). Comment text always starts with a /
|
|
|
|
|
// which can be used to distinguish these handler calls from errors.
|
2016-12-02 10:44:34 -08:00
|
|
|
//
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
// If the scanner mode includes the directives (but not the comments)
|
|
|
|
|
// flag, only comments containing a //line, /*line, or //go: directive
|
|
|
|
|
// are reported, in the same way as regular comments. Directives in
|
|
|
|
|
// //-style comments are only recognized if they are at the beginning
|
|
|
|
|
// of a line.
|
2016-12-02 10:44:34 -08:00
|
|
|
//
|
2016-03-04 17:09:08 -08:00
|
|
|
func (s *scanner) next() {
|
|
|
|
|
nlsemi := s.nlsemi
|
|
|
|
|
s.nlsemi = false
|
|
|
|
|
|
|
|
|
|
redo:
|
|
|
|
|
// skip white space
|
|
|
|
|
c := s.getr()
|
|
|
|
|
for c == ' ' || c == '\t' || c == '\n' && !nlsemi || c == '\r' {
|
|
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// token start
|
2016-11-29 16:13:09 -08:00
|
|
|
s.line, s.col = s.source.line0, s.source.col0
|
2016-03-04 17:09:08 -08:00
|
|
|
|
2016-12-02 16:22:45 -08:00
|
|
|
if isLetter(c) || c >= utf8.RuneSelf && s.isIdentRune(c, true) {
|
2016-03-04 17:09:08 -08:00
|
|
|
s.ident()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch c {
|
|
|
|
|
case -1:
|
|
|
|
|
if nlsemi {
|
2017-02-08 17:30:45 -08:00
|
|
|
s.lit = "EOF"
|
2016-03-04 17:09:08 -08:00
|
|
|
s.tok = _Semi
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.tok = _EOF
|
|
|
|
|
|
|
|
|
|
case '\n':
|
2017-02-08 17:30:45 -08:00
|
|
|
s.lit = "newline"
|
2016-03-04 17:09:08 -08:00
|
|
|
s.tok = _Semi
|
|
|
|
|
|
|
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
|
|
|
s.number(c)
|
|
|
|
|
|
|
|
|
|
case '"':
|
|
|
|
|
s.stdString()
|
|
|
|
|
|
|
|
|
|
case '`':
|
|
|
|
|
s.rawString()
|
|
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
|
s.rune()
|
|
|
|
|
|
|
|
|
|
case '(':
|
|
|
|
|
s.tok = _Lparen
|
|
|
|
|
|
|
|
|
|
case '[':
|
|
|
|
|
s.tok = _Lbrack
|
|
|
|
|
|
|
|
|
|
case '{':
|
|
|
|
|
s.tok = _Lbrace
|
|
|
|
|
|
|
|
|
|
case ',':
|
|
|
|
|
s.tok = _Comma
|
|
|
|
|
|
|
|
|
|
case ';':
|
2017-02-08 17:30:45 -08:00
|
|
|
s.lit = "semicolon"
|
2016-03-04 17:09:08 -08:00
|
|
|
s.tok = _Semi
|
|
|
|
|
|
|
|
|
|
case ')':
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.tok = _Rparen
|
|
|
|
|
|
|
|
|
|
case ']':
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.tok = _Rbrack
|
|
|
|
|
|
|
|
|
|
case '}':
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.tok = _Rbrace
|
|
|
|
|
|
|
|
|
|
case ':':
|
|
|
|
|
if s.getr() == '=' {
|
|
|
|
|
s.tok = _Define
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.tok = _Colon
|
|
|
|
|
|
|
|
|
|
case '.':
|
|
|
|
|
c = s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
if isDecimal(c) {
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.unread(1) // correct position of '.' (needed by startLit in number)
|
2016-03-04 17:09:08 -08:00
|
|
|
s.number('.')
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if c == '.' {
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c == '.' {
|
|
|
|
|
s.tok = _DotDotDot
|
|
|
|
|
break
|
|
|
|
|
}
|
2019-01-22 15:20:50 -08:00
|
|
|
s.unread(1)
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.tok = _Dot
|
|
|
|
|
|
|
|
|
|
case '+':
|
|
|
|
|
s.op, s.prec = Add, precAdd
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c != '+' {
|
|
|
|
|
goto assignop
|
|
|
|
|
}
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.tok = _IncOp
|
|
|
|
|
|
|
|
|
|
case '-':
|
|
|
|
|
s.op, s.prec = Sub, precAdd
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c != '-' {
|
|
|
|
|
goto assignop
|
|
|
|
|
}
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.tok = _IncOp
|
|
|
|
|
|
|
|
|
|
case '*':
|
|
|
|
|
s.op, s.prec = Mul, precMul
|
|
|
|
|
// don't goto assignop - want _Star token
|
|
|
|
|
if s.getr() == '=' {
|
|
|
|
|
s.tok = _AssignOp
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.tok = _Star
|
|
|
|
|
|
|
|
|
|
case '/':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c == '/' {
|
|
|
|
|
s.lineComment()
|
|
|
|
|
goto redo
|
|
|
|
|
}
|
|
|
|
|
if c == '*' {
|
|
|
|
|
s.fullComment()
|
|
|
|
|
if s.source.line > s.line && nlsemi {
|
|
|
|
|
// A multi-line comment acts like a newline;
|
|
|
|
|
// it translates to a ';' if nlsemi is set.
|
2017-02-08 17:30:45 -08:00
|
|
|
s.lit = "newline"
|
2016-03-04 17:09:08 -08:00
|
|
|
s.tok = _Semi
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
goto redo
|
|
|
|
|
}
|
|
|
|
|
s.op, s.prec = Div, precMul
|
|
|
|
|
goto assignop
|
|
|
|
|
|
|
|
|
|
case '%':
|
|
|
|
|
s.op, s.prec = Rem, precMul
|
|
|
|
|
c = s.getr()
|
|
|
|
|
goto assignop
|
|
|
|
|
|
|
|
|
|
case '&':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c == '&' {
|
|
|
|
|
s.op, s.prec = AndAnd, precAndAnd
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.op, s.prec = And, precMul
|
|
|
|
|
if c == '^' {
|
|
|
|
|
s.op = AndNot
|
|
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
goto assignop
|
|
|
|
|
|
|
|
|
|
case '|':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c == '|' {
|
|
|
|
|
s.op, s.prec = OrOr, precOrOr
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.op, s.prec = Or, precAdd
|
|
|
|
|
goto assignop
|
|
|
|
|
|
|
|
|
|
case '^':
|
|
|
|
|
s.op, s.prec = Xor, precAdd
|
|
|
|
|
c = s.getr()
|
|
|
|
|
goto assignop
|
|
|
|
|
|
|
|
|
|
case '<':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c == '=' {
|
|
|
|
|
s.op, s.prec = Leq, precCmp
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if c == '<' {
|
|
|
|
|
s.op, s.prec = Shl, precMul
|
|
|
|
|
c = s.getr()
|
|
|
|
|
goto assignop
|
|
|
|
|
}
|
|
|
|
|
if c == '-' {
|
2016-11-04 16:27:31 -07:00
|
|
|
s.tok = _Arrow
|
2016-03-04 17:09:08 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.op, s.prec = Lss, precCmp
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
|
|
|
|
|
case '>':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
if c == '=' {
|
|
|
|
|
s.op, s.prec = Geq, precCmp
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if c == '>' {
|
|
|
|
|
s.op, s.prec = Shr, precMul
|
|
|
|
|
c = s.getr()
|
|
|
|
|
goto assignop
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.op, s.prec = Gtr, precCmp
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
|
|
|
|
|
case '=':
|
2016-11-04 16:27:31 -07:00
|
|
|
if s.getr() == '=' {
|
2016-03-04 17:09:08 -08:00
|
|
|
s.op, s.prec = Eql, precCmp
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.tok = _Assign
|
|
|
|
|
|
|
|
|
|
case '!':
|
|
|
|
|
if s.getr() == '=' {
|
|
|
|
|
s.op, s.prec = Neq, precCmp
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.op, s.prec = Not, 0
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
s.tok = 0
|
2019-01-12 20:33:58 -08:00
|
|
|
s.errorf("invalid character %#U", c)
|
2016-03-04 17:09:08 -08:00
|
|
|
goto redo
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
assignop:
|
|
|
|
|
if c == '=' {
|
|
|
|
|
s.tok = _AssignOp
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
s.tok = _Operator
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isLetter(c rune) bool {
|
2019-01-12 20:33:58 -08:00
|
|
|
return 'a' <= lower(c) && lower(c) <= 'z' || c == '_'
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *scanner) ident() {
|
|
|
|
|
s.startLit()
|
|
|
|
|
|
|
|
|
|
// accelerate common case (7bit ASCII)
|
|
|
|
|
c := s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
for isLetter(c) || isDecimal(c) {
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// general case
|
|
|
|
|
if c >= utf8.RuneSelf {
|
2016-12-02 16:22:45 -08:00
|
|
|
for s.isIdentRune(c, false) {
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
|
|
|
|
|
lit := s.stopLit()
|
|
|
|
|
|
|
|
|
|
// possibly a keyword
|
|
|
|
|
if len(lit) >= 2 {
|
2018-02-20 10:21:41 +00:00
|
|
|
if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) {
|
2016-03-04 17:09:08 -08:00
|
|
|
s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
|
|
|
|
|
s.tok = tok
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.lit = string(lit)
|
|
|
|
|
s.tok = _Name
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-20 10:21:41 +00:00
|
|
|
// tokStrFast is a faster version of token.String, which assumes that tok
|
|
|
|
|
// is one of the valid tokens - and can thus skip bounds checks.
|
|
|
|
|
func tokStrFast(tok token) string {
|
|
|
|
|
return _token_name[_token_index[tok-1]:_token_index[tok]]
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-02 16:22:45 -08:00
|
|
|
func (s *scanner) isIdentRune(c rune, first bool) bool {
|
|
|
|
|
switch {
|
|
|
|
|
case unicode.IsLetter(c) || c == '_':
|
|
|
|
|
// ok
|
|
|
|
|
case unicode.IsDigit(c):
|
|
|
|
|
if first {
|
2019-01-12 20:33:58 -08:00
|
|
|
s.errorf("identifier cannot begin with digit %#U", c)
|
2016-12-02 16:22:45 -08:00
|
|
|
}
|
|
|
|
|
case c >= utf8.RuneSelf:
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("invalid character %#U in identifier", c)
|
2016-12-02 16:22:45 -08:00
|
|
|
default:
|
|
|
|
|
return false
|
2016-06-06 17:59:05 -07:00
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
// hash is a perfect hash function for keywords.
|
|
|
|
|
// It assumes that s has at least length 2.
|
|
|
|
|
func hash(s []byte) uint {
|
|
|
|
|
return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var keywordMap [1 << 6]token // size must be power of two
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
// populate keywordMap
|
|
|
|
|
for tok := _Break; tok <= _Var; tok++ {
|
2018-02-20 10:21:41 +00:00
|
|
|
h := hash([]byte(tok.String()))
|
2016-03-04 17:09:08 -08:00
|
|
|
if keywordMap[h] != 0 {
|
|
|
|
|
panic("imperfect hash")
|
|
|
|
|
}
|
|
|
|
|
keywordMap[h] = tok
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-01-12 20:33:58 -08:00
|
|
|
func lower(c rune) rune { return ('a' - 'A') | c } // returns lower-case c iff c is ASCII letter
|
|
|
|
|
func isDecimal(c rune) bool { return '0' <= c && c <= '9' }
|
|
|
|
|
func isHex(c rune) bool { return '0' <= c && c <= '9' || 'a' <= lower(c) && lower(c) <= 'f' }
|
|
|
|
|
|
|
|
|
|
// digits accepts the sequence { digit | '_' } starting with c0.
|
|
|
|
|
// If base <= 10, digits accepts any decimal digit but records
|
|
|
|
|
// the index (relative to the literal start) of a digit >= base
|
|
|
|
|
// in *invalid, if *invalid < 0.
|
|
|
|
|
// digits returns the first rune that is not part of the sequence
|
|
|
|
|
// anymore, and a bitset describing whether the sequence contained
|
|
|
|
|
// digits (bit 0 is set), or separators '_' (bit 1 is set).
|
|
|
|
|
func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {
|
|
|
|
|
c = c0
|
|
|
|
|
if base <= 10 {
|
|
|
|
|
max := rune('0' + base)
|
|
|
|
|
for isDecimal(c) || c == '_' {
|
|
|
|
|
ds := 1
|
|
|
|
|
if c == '_' {
|
|
|
|
|
ds = 2
|
|
|
|
|
} else if c >= max && *invalid < 0 {
|
|
|
|
|
*invalid = int(s.col0 - s.col) // record invalid rune index
|
|
|
|
|
}
|
|
|
|
|
digsep |= ds
|
|
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for isHex(c) || c == '_' {
|
|
|
|
|
ds := 1
|
|
|
|
|
if c == '_' {
|
|
|
|
|
ds = 2
|
|
|
|
|
}
|
|
|
|
|
digsep |= ds
|
|
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
func (s *scanner) number(c rune) {
|
|
|
|
|
s.startLit()
|
2019-08-28 21:56:47 -07:00
|
|
|
s.bad = false
|
2016-03-04 17:09:08 -08:00
|
|
|
|
2019-01-12 20:33:58 -08:00
|
|
|
base := 10 // number base
|
|
|
|
|
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
|
|
|
|
|
digsep := 0 // bit 0: digit present, bit 1: '_' present
|
|
|
|
|
invalid := -1 // index of invalid digit in literal, or < 0
|
|
|
|
|
|
|
|
|
|
// integer part
|
|
|
|
|
var ds int
|
2016-03-04 17:09:08 -08:00
|
|
|
if c != '.' {
|
2019-01-12 20:33:58 -08:00
|
|
|
s.kind = IntLit
|
2016-03-04 17:09:08 -08:00
|
|
|
if c == '0' {
|
|
|
|
|
c = s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
switch lower(c) {
|
|
|
|
|
case 'x':
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
base, prefix = 16, 'x'
|
|
|
|
|
case 'o':
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
base, prefix = 8, 'o'
|
|
|
|
|
case 'b':
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
base, prefix = 2, 'b'
|
|
|
|
|
default:
|
|
|
|
|
base, prefix = 8, '0'
|
|
|
|
|
digsep = 1 // leading 0
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
2019-01-12 20:33:58 -08:00
|
|
|
c, ds = s.digits(c, base, &invalid)
|
|
|
|
|
digsep |= ds
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-01-12 20:33:58 -08:00
|
|
|
// fractional part
|
2016-03-04 17:09:08 -08:00
|
|
|
if c == '.' {
|
|
|
|
|
s.kind = FloatLit
|
2019-01-12 20:33:58 -08:00
|
|
|
if prefix == 'o' || prefix == 'b' {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorf("invalid radix point in %s", litname(prefix))
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
2019-01-12 20:33:58 -08:00
|
|
|
c, ds = s.digits(s.getr(), base, &invalid)
|
|
|
|
|
digsep |= ds
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-29 17:34:17 -07:00
|
|
|
if digsep&1 == 0 && !s.bad {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorf("%s has no digits", litname(prefix))
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// exponent
|
2019-01-12 20:33:58 -08:00
|
|
|
if e := lower(c); e == 'e' || e == 'p' {
|
2019-08-29 17:34:17 -07:00
|
|
|
if !s.bad {
|
|
|
|
|
switch {
|
|
|
|
|
case e == 'e' && prefix != 0 && prefix != '0':
|
|
|
|
|
s.errorf("%q exponent requires decimal mantissa", c)
|
|
|
|
|
case e == 'p' && prefix != 'x':
|
|
|
|
|
s.errorf("%q exponent requires hexadecimal mantissa", c)
|
|
|
|
|
}
|
2019-01-12 20:33:58 -08:00
|
|
|
}
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
2019-01-12 20:33:58 -08:00
|
|
|
s.kind = FloatLit
|
|
|
|
|
if c == '+' || c == '-' {
|
2016-03-04 17:09:08 -08:00
|
|
|
c = s.getr()
|
|
|
|
|
}
|
2019-01-12 20:33:58 -08:00
|
|
|
c, ds = s.digits(c, 10, nil)
|
|
|
|
|
digsep |= ds
|
2019-08-29 17:34:17 -07:00
|
|
|
if ds&1 == 0 && !s.bad {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorf("exponent has no digits")
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
2019-08-29 17:34:17 -07:00
|
|
|
} else if prefix == 'x' && s.kind == FloatLit && !s.bad {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorf("hexadecimal mantissa requires a 'p' exponent")
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-01-12 20:33:58 -08:00
|
|
|
// suffix 'i'
|
2016-03-04 17:09:08 -08:00
|
|
|
if c == 'i' {
|
|
|
|
|
s.kind = ImagLit
|
2019-01-12 20:33:58 -08:00
|
|
|
c = s.getr()
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
s.ungetr()
|
2019-01-12 20:33:58 -08:00
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
s.nlsemi = true
|
|
|
|
|
s.lit = string(s.stopLit())
|
|
|
|
|
s.tok = _Literal
|
2019-01-12 20:33:58 -08:00
|
|
|
|
2019-08-29 17:34:17 -07:00
|
|
|
if s.kind == IntLit && invalid >= 0 && !s.bad {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorAtf(invalid, "invalid digit %q in %s", s.lit[invalid], litname(prefix))
|
2019-01-12 20:33:58 -08:00
|
|
|
}
|
|
|
|
|
|
2019-08-29 17:34:17 -07:00
|
|
|
if digsep&2 != 0 && !s.bad {
|
2019-01-12 20:33:58 -08:00
|
|
|
if i := invalidSep(s.lit); i >= 0 {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorAtf(i, "'_' must separate successive digits")
|
2019-01-12 20:33:58 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func litname(prefix rune) string {
|
|
|
|
|
switch prefix {
|
|
|
|
|
case 'x':
|
|
|
|
|
return "hexadecimal literal"
|
|
|
|
|
case 'o', '0':
|
|
|
|
|
return "octal literal"
|
|
|
|
|
case 'b':
|
|
|
|
|
return "binary literal"
|
|
|
|
|
}
|
|
|
|
|
return "decimal literal"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// invalidSep returns the index of the first invalid separator in x, or -1.
|
|
|
|
|
func invalidSep(x string) int {
|
|
|
|
|
x1 := ' ' // prefix char, we only care if it's 'x'
|
|
|
|
|
d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
|
|
|
|
|
i := 0
|
|
|
|
|
|
|
|
|
|
// a prefix counts as a digit
|
|
|
|
|
if len(x) >= 2 && x[0] == '0' {
|
|
|
|
|
x1 = lower(rune(x[1]))
|
|
|
|
|
if x1 == 'x' || x1 == 'o' || x1 == 'b' {
|
|
|
|
|
d = '0'
|
|
|
|
|
i = 2
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// mantissa and exponent
|
|
|
|
|
for ; i < len(x); i++ {
|
|
|
|
|
p := d // previous digit
|
|
|
|
|
d = rune(x[i])
|
|
|
|
|
switch {
|
|
|
|
|
case d == '_':
|
|
|
|
|
if p != '0' {
|
|
|
|
|
return i
|
|
|
|
|
}
|
|
|
|
|
case isDecimal(d) || x1 == 'x' && isHex(d):
|
|
|
|
|
d = '0'
|
|
|
|
|
default:
|
|
|
|
|
if p == '_' {
|
|
|
|
|
return i - 1
|
|
|
|
|
}
|
|
|
|
|
d = '.'
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if d == '_' {
|
|
|
|
|
return len(x) - 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
2017-02-16 12:52:01 -08:00
|
|
|
func (s *scanner) rune() {
|
|
|
|
|
s.startLit()
|
2019-08-28 21:56:47 -07:00
|
|
|
s.bad = false
|
2017-02-16 12:52:01 -08:00
|
|
|
|
|
|
|
|
n := 0
|
|
|
|
|
for ; ; n++ {
|
|
|
|
|
r := s.getr()
|
|
|
|
|
if r == '\'' {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if r == '\\' {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.escape('\'')
|
2017-02-16 12:52:01 -08:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if r == '\n' {
|
|
|
|
|
s.ungetr() // assume newline is not part of literal
|
2019-08-28 21:56:47 -07:00
|
|
|
if !s.bad {
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("newline in rune literal")
|
2017-02-16 12:52:01 -08:00
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if r < 0 {
|
2019-08-28 21:56:47 -07:00
|
|
|
if !s.bad {
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorAtf(0, "rune literal not terminated")
|
2017-02-16 12:52:01 -08:00
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-28 21:56:47 -07:00
|
|
|
if !s.bad {
|
2017-02-16 12:52:01 -08:00
|
|
|
if n == 0 {
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("empty rune literal or unescaped '")
|
2017-02-16 12:52:01 -08:00
|
|
|
} else if n != 1 {
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorAtf(0, "more than one character in rune literal")
|
2017-02-16 12:52:01 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.lit = string(s.stopLit())
|
|
|
|
|
s.kind = RuneLit
|
|
|
|
|
s.tok = _Literal
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
func (s *scanner) stdString() {
|
|
|
|
|
s.startLit()
|
2019-08-28 21:56:47 -07:00
|
|
|
s.bad = false
|
2016-03-04 17:09:08 -08:00
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
r := s.getr()
|
|
|
|
|
if r == '"' {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if r == '\\' {
|
|
|
|
|
s.escape('"')
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if r == '\n' {
|
|
|
|
|
s.ungetr() // assume newline is not part of literal
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorf("newline in string")
|
2016-03-04 17:09:08 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if r < 0 {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorAtf(0, "string not terminated")
|
2016-03-04 17:09:08 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.lit = string(s.stopLit())
|
|
|
|
|
s.kind = StringLit
|
|
|
|
|
s.tok = _Literal
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *scanner) rawString() {
|
|
|
|
|
s.startLit()
|
2019-08-28 21:56:47 -07:00
|
|
|
s.bad = false
|
2016-03-04 17:09:08 -08:00
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
r := s.getr()
|
|
|
|
|
if r == '`' {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if r < 0 {
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorAtf(0, "string not terminated")
|
2016-03-04 17:09:08 -08:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// We leave CRs in the string since they are part of the
|
|
|
|
|
// literal (even though they are not part of the literal
|
|
|
|
|
// value).
|
|
|
|
|
|
|
|
|
|
s.nlsemi = true
|
|
|
|
|
s.lit = string(s.stopLit())
|
|
|
|
|
s.kind = StringLit
|
|
|
|
|
s.tok = _Literal
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
func (s *scanner) comment(text string) {
|
|
|
|
|
s.errh(s.line, s.col, text)
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-30 23:28:40 -08:00
|
|
|
func (s *scanner) skipLine(r rune) {
|
|
|
|
|
for r >= 0 {
|
|
|
|
|
if r == '\n' {
|
|
|
|
|
s.ungetr() // don't consume '\n' - needed for nlsemi logic
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
r = s.getr()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-04 17:09:08 -08:00
|
|
|
func (s *scanner) lineComment() {
|
|
|
|
|
r := s.getr()
|
2018-01-03 15:52:22 -08:00
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
if s.mode&comments != 0 {
|
|
|
|
|
s.startLit()
|
|
|
|
|
s.skipLine(r)
|
|
|
|
|
s.comment("//" + string(s.stopLit()))
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-09 13:38:10 -08:00
|
|
|
// directives must start at the beginning of the line (s.col == colbase)
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
if s.mode&directives == 0 || s.col != colbase || (r != 'g' && r != 'l') {
|
2016-11-30 23:28:40 -08:00
|
|
|
s.skipLine(r)
|
|
|
|
|
return
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
// recognize go: or line directives
|
2016-12-01 22:04:49 -08:00
|
|
|
prefix := "go:"
|
|
|
|
|
if r == 'l' {
|
|
|
|
|
prefix = "line "
|
|
|
|
|
}
|
2016-03-04 17:09:08 -08:00
|
|
|
for _, m := range prefix {
|
|
|
|
|
if r != m {
|
2016-11-30 23:28:40 -08:00
|
|
|
s.skipLine(r)
|
|
|
|
|
return
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
r = s.getr()
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
// directive text
|
2016-11-30 23:28:40 -08:00
|
|
|
s.startLit()
|
|
|
|
|
s.skipLine(r)
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
s.comment("//" + prefix + string(s.stopLit()))
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
func (s *scanner) skipComment(r rune) bool {
|
2018-01-03 15:52:22 -08:00
|
|
|
for r >= 0 {
|
2016-03-04 17:09:08 -08:00
|
|
|
for r == '*' {
|
|
|
|
|
r = s.getr()
|
|
|
|
|
if r == '/' {
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
return true
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
}
|
2018-01-03 15:52:22 -08:00
|
|
|
r = s.getr()
|
|
|
|
|
}
|
2019-08-28 21:56:47 -07:00
|
|
|
s.errorAtf(0, "comment not terminated")
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
return false
|
2018-01-03 15:52:22 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *scanner) fullComment() {
|
|
|
|
|
r := s.getr()
|
|
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
if s.mode&comments != 0 {
|
|
|
|
|
s.startLit()
|
|
|
|
|
if s.skipComment(r) {
|
|
|
|
|
s.comment("/*" + string(s.stopLit()))
|
|
|
|
|
} else {
|
|
|
|
|
s.killLit() // not a complete comment - ignore
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if s.mode&directives == 0 || r != 'l' {
|
2018-01-03 15:52:22 -08:00
|
|
|
s.skipComment(r)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// recognize line directive
|
|
|
|
|
const prefix = "line "
|
|
|
|
|
for _, m := range prefix {
|
|
|
|
|
if r != m {
|
|
|
|
|
s.skipComment(r)
|
2016-03-04 17:09:08 -08:00
|
|
|
return
|
|
|
|
|
}
|
2018-01-03 15:52:22 -08:00
|
|
|
r = s.getr()
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
2018-01-03 15:52:22 -08:00
|
|
|
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
// directive text
|
2018-01-03 15:52:22 -08:00
|
|
|
s.startLit()
|
cmd/compile/internal/syntax: implement comment reporting in scanner
R=go1.11
In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.
In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).
Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.
Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.
Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-01-17 21:42:51 -08:00
|
|
|
if s.skipComment(r) {
|
|
|
|
|
s.comment("/*" + prefix + string(s.stopLit()))
|
|
|
|
|
} else {
|
|
|
|
|
s.killLit() // not a complete comment - ignore
|
2018-01-03 15:52:22 -08:00
|
|
|
}
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-08-28 21:56:47 -07:00
|
|
|
func (s *scanner) escape(quote rune) {
|
2016-03-04 17:09:08 -08:00
|
|
|
var n int
|
|
|
|
|
var base, max uint32
|
|
|
|
|
|
|
|
|
|
c := s.getr()
|
|
|
|
|
switch c {
|
|
|
|
|
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
|
2019-08-28 21:56:47 -07:00
|
|
|
return
|
2016-03-04 17:09:08 -08:00
|
|
|
case '0', '1', '2', '3', '4', '5', '6', '7':
|
|
|
|
|
n, base, max = 3, 8, 255
|
|
|
|
|
case 'x':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
n, base, max = 2, 16, 255
|
|
|
|
|
case 'u':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
n, base, max = 4, 16, unicode.MaxRune
|
|
|
|
|
case 'U':
|
|
|
|
|
c = s.getr()
|
|
|
|
|
n, base, max = 8, 16, unicode.MaxRune
|
|
|
|
|
default:
|
|
|
|
|
if c < 0 {
|
2019-08-28 21:56:47 -07:00
|
|
|
return // complain in caller about EOF
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("unknown escape")
|
2019-08-28 21:56:47 -07:00
|
|
|
return
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var x uint32
|
|
|
|
|
for i := n; i > 0; i-- {
|
|
|
|
|
d := base
|
|
|
|
|
switch {
|
2019-01-12 20:33:58 -08:00
|
|
|
case isDecimal(c):
|
2016-03-04 17:09:08 -08:00
|
|
|
d = uint32(c) - '0'
|
2019-01-12 20:33:58 -08:00
|
|
|
case 'a' <= lower(c) && lower(c) <= 'f':
|
|
|
|
|
d = uint32(lower(c)) - ('a' - 10)
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
if d >= base {
|
|
|
|
|
if c < 0 {
|
2019-08-28 21:56:47 -07:00
|
|
|
return // complain in caller about EOF
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
2016-12-02 16:22:45 -08:00
|
|
|
kind := "hex"
|
|
|
|
|
if base == 8 {
|
|
|
|
|
kind = "octal"
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("invalid character %q in %s escape", c, kind)
|
2016-03-04 17:09:08 -08:00
|
|
|
s.ungetr()
|
2019-08-28 21:56:47 -07:00
|
|
|
return
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
// d < base
|
|
|
|
|
x = x*base + d
|
|
|
|
|
c = s.getr()
|
|
|
|
|
}
|
|
|
|
|
s.ungetr()
|
|
|
|
|
|
2016-06-06 17:59:05 -07:00
|
|
|
if x > max && base == 8 {
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("octal escape value %d > 255", x)
|
2019-08-28 21:56:47 -07:00
|
|
|
return
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
|
2020-02-10 22:02:47 -08:00
|
|
|
s.errorf("escape is invalid Unicode code point %#U", x)
|
2016-03-04 17:09:08 -08:00
|
|
|
}
|
|
|
|
|
}
|