cmd/compile/internal/syntax: implement comment reporting in scanner

R=go1.11

In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.

In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).

Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.

Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.

Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
This commit is contained in:
Robert Griesemer 2018-01-17 21:42:51 -08:00
parent 670494827c
commit b890688986
4 changed files with 164 additions and 50 deletions

View file

@ -38,24 +38,31 @@ func (p *parser) init(base *src.PosBase, r io.Reader, errh ErrorHandler, pragh P
p.mode = mode
p.scanner.init(
r,
// Error and pragma handlers for scanner.
// Because the (line, col) positions passed to these
// handlers are always at or after the current reading
// position, it is save to use the most recent position
// Error and directive handler for scanner.
// Because the (line, col) positions passed to the
// handler is always at or after the current reading
// position, it is safe to use the most recent position
// base to compute the corresponding Pos value.
func(line, col uint, msg string) {
p.error_at(p.pos_at(line, col), msg)
},
func(line, col uint, text string) {
const prefix = "line "
if strings.HasPrefix(text, prefix) {
p.updateBase(line, col+uint(len(prefix)), text[len(prefix):])
if msg[0] != '/' {
p.error_at(p.pos_at(line, col), msg)
return
}
if pragh != nil {
// otherwise it must be a comment containing a line or go: directive
text := commentText(msg)
col += 2 // text starts after // or /*
if strings.HasPrefix(text, "line ") {
p.updateBase(line, col+5, text[5:])
return
}
// go: directive (but be conservative and test)
if pragh != nil && strings.HasPrefix(text, "go:") {
p.pragma |= pragh(p.pos_at(line, col), text)
}
},
directives,
)
p.first = nil
@ -109,6 +116,20 @@ func (p *parser) updateBase(line, col uint, text string) {
p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n) /*uint(n2)*/)
}
func commentText(s string) string {
if s[:2] == "/*" {
return s[2 : len(s)-2] // lop off /* and */
}
// line comment (does not include newline)
// (on Windows, the line comment may end in \r\n)
i := len(s)
if s[i-1] == '\r' {
i--
}
return s[2:i] // lop off // and \r at end, if any
}
func trailingDigits(text string) (uint, uint, bool) {
// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')