diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go index 0a872e03ef9..25bbbeec69a 100644 --- a/src/cmd/compile/internal/syntax/parser.go +++ b/src/cmd/compile/internal/syntax/parser.go @@ -38,24 +38,31 @@ func (p *parser) init(base *src.PosBase, r io.Reader, errh ErrorHandler, pragh P p.mode = mode p.scanner.init( r, - // Error and pragma handlers for scanner. - // Because the (line, col) positions passed to these - // handlers are always at or after the current reading - // position, it is save to use the most recent position + // Error and directive handler for scanner. + // Because the (line, col) positions passed to the + // handler is always at or after the current reading + // position, it is safe to use the most recent position // base to compute the corresponding Pos value. func(line, col uint, msg string) { - p.error_at(p.pos_at(line, col), msg) - }, - func(line, col uint, text string) { - const prefix = "line " - if strings.HasPrefix(text, prefix) { - p.updateBase(line, col+uint(len(prefix)), text[len(prefix):]) + if msg[0] != '/' { + p.error_at(p.pos_at(line, col), msg) return } - if pragh != nil { + + // otherwise it must be a comment containing a line or go: directive + text := commentText(msg) + col += 2 // text starts after // or /* + if strings.HasPrefix(text, "line ") { + p.updateBase(line, col+5, text[5:]) + return + } + + // go: directive (but be conservative and test) + if pragh != nil && strings.HasPrefix(text, "go:") { p.pragma |= pragh(p.pos_at(line, col), text) } }, + directives, ) p.first = nil @@ -109,6 +116,20 @@ func (p *parser) updateBase(line, col uint, text string) { p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n) /*uint(n2)*/) } +func commentText(s string) string { + if s[:2] == "/*" { + return s[2 : len(s)-2] // lop off /* and */ + } + + // line comment (does not include newline) + // (on Windows, the line comment may end in \r\n) + i := len(s) + if s[i-1] == '\r' { + i-- + } + return s[2:i] // lop off // and \r at end, if any +} + func trailingDigits(text string) (uint, uint, bool) { // Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails. i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':') diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go index be406d97530..1e0ff2e3cc2 100644 --- a/src/cmd/compile/internal/syntax/scanner.go +++ b/src/cmd/compile/internal/syntax/scanner.go @@ -19,9 +19,17 @@ import ( "unicode/utf8" ) +// The mode flags below control which comments are reported +// by calling the error handler. If no flag is set, comments +// are ignored. +const ( + comments uint = 1 << iota // call handler for all comments + directives // call handler for directives only +) + type scanner struct { source - pragh func(line, col uint, msg string) + mode uint nlsemi bool // if set '\n' and EOF translate to ';' // current token, valid after calling next() @@ -33,25 +41,32 @@ type scanner struct { prec int // valid if tok is _Operator, _AssignOp, or _IncOp } -func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string)) { +func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) { s.source.init(src, errh) - s.pragh = pragh + s.mode = mode s.nlsemi = false } // next advances the scanner by reading the next token. // -// If a read, source encoding, or lexical error occurs, next -// calls the error handler installed with init. The handler -// must exist. +// If a read, source encoding, or lexical error occurs, next calls +// the installed error handler with the respective error position +// and message. The error message is guaranteed to be non-empty and +// never starts with a '/'. The error handler must exist. // -// If a //line or //go: directive is encountered at the start -// of a line, next calls the directive handler pragh installed -// with init, if not nil. +// If the scanner mode includes the comments flag and a comment +// (including comments containing directives) is encountered, the +// error handler is also called with each comment position and text +// (including opening /* or // and closing */, but without a newline +// at the end of line comments). Comment text always starts with a / +// which can be used to distinguish these handler calls from errors. +// +// If the scanner mode includes the directives (but not the comments) +// flag, only comments containing a //line, /*line, or //go: directive +// are reported, in the same way as regular comments. Directives in +// //-style comments are only recognized if they are at the beginning +// of a line. // -// The (line, col) position passed to the error and directive -// handler is always at or after the current source reading -// position. func (s *scanner) next() { nlsemi := s.nlsemi s.nlsemi = false @@ -565,6 +580,10 @@ func (s *scanner) rawString() { s.tok = _Literal } +func (s *scanner) comment(text string) { + s.errh(s.line, s.col, text) +} + func (s *scanner) skipLine(r rune) { for r >= 0 { if r == '\n' { @@ -578,14 +597,20 @@ func (s *scanner) skipLine(r rune) { func (s *scanner) lineComment() { r := s.getr() + if s.mode&comments != 0 { + s.startLit() + s.skipLine(r) + s.comment("//" + string(s.stopLit())) + return + } + // directives must start at the beginning of the line (s.col == colbase) - if s.col != colbase || s.pragh == nil || (r != 'g' && r != 'l') { + if s.mode&directives == 0 || s.col != colbase || (r != 'g' && r != 'l') { s.skipLine(r) return } - // s.col == colbase && s.pragh != nil && (r == 'g' || r == 'l') - // recognize directives + // recognize go: or line directives prefix := "go:" if r == 'l' { prefix = "line " @@ -598,38 +623,43 @@ func (s *scanner) lineComment() { r = s.getr() } - // directive text without line ending (which may be "\r\n" if Windows), + // directive text s.startLit() s.skipLine(r) - text := s.stopLit() - if i := len(text) - 1; i >= 0 && text[i] == '\r' { - text = text[:i] - } - - s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after // + s.comment("//" + prefix + string(s.stopLit())) } -func (s *scanner) skipComment(r rune) { +func (s *scanner) skipComment(r rune) bool { for r >= 0 { for r == '*' { r = s.getr() if r == '/' { - return + return true } } r = s.getr() } s.errh(s.line, s.col, "comment not terminated") + return false } func (s *scanner) fullComment() { r := s.getr() - if s.pragh == nil || r != 'l' { + if s.mode&comments != 0 { + s.startLit() + if s.skipComment(r) { + s.comment("/*" + string(s.stopLit())) + } else { + s.killLit() // not a complete comment - ignore + } + return + } + + if s.mode&directives == 0 || r != 'l' { s.skipComment(r) return } - // s.pragh != nil && r == 'l' // recognize line directive const prefix = "line " @@ -641,15 +671,13 @@ func (s *scanner) fullComment() { r = s.getr() } - // directive text without comment ending + // directive text s.startLit() - s.skipComment(r) - text := s.stopLit() - if i := len(text) - 2; i >= 0 && text[i] == '*' && text[i+1] == '/' { - text = text[:i] + if s.skipComment(r) { + s.comment("/*" + prefix + string(s.stopLit())) + } else { + s.killLit() // not a complete comment - ignore } - - s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after /* } func (s *scanner) escape(quote rune) bool { diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go index 160bcbee267..4bfe5871fab 100644 --- a/src/cmd/compile/internal/syntax/scanner_test.go +++ b/src/cmd/compile/internal/syntax/scanner_test.go @@ -24,7 +24,7 @@ func TestScanner(t *testing.T) { defer src.Close() var s scanner - s.init(src, nil, nil) + s.init(src, nil, 0) for { s.next() if s.tok == _EOF { @@ -53,7 +53,7 @@ func TestTokens(t *testing.T) { // scan source var got scanner - got.init(&buf, nil, nil) + got.init(&buf, nil, 0) got.next() for i, want := range sampleTokens { nlsemi := false @@ -263,6 +263,66 @@ var sampleTokens = [...]struct { {_Var, "var", 0, 0}, } +func TestComments(t *testing.T) { + type comment struct { + line, col uint // 0-based + text string + } + + for _, test := range []struct { + src string + want comment + }{ + // no comments + {"no comment here", comment{0, 0, ""}}, + {" /", comment{0, 0, ""}}, + {"\n /*/", comment{0, 0, ""}}, + + //-style comments + {"// line comment\n", comment{0, 0, "// line comment"}}, + {"package p // line comment\n", comment{0, 10, "// line comment"}}, + {"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}}, + {"\n\n//\n", comment{2, 0, "//"}}, + {"//", comment{0, 0, "//"}}, + + /*-style comments */ + {"/* regular comment */", comment{0, 0, "/* regular comment */"}}, + {"package p /* regular comment", comment{0, 0, ""}}, + {"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}}, + {"\n\n/**/", comment{2, 0, "/**/"}}, + {"/*", comment{0, 0, ""}}, + } { + var s scanner + var got comment + s.init(strings.NewReader(test.src), + func(line, col uint, msg string) { + if msg[0] != '/' { + // error + if msg != "comment not terminated" { + t.Errorf("%q: %s", test.src, msg) + } + return + } + got = comment{line - linebase, col - colbase, msg} // keep last one + }, comments) + + for { + s.next() + if s.tok == _EOF { + break + } + } + + want := test.want + if got.line != want.line || got.col != want.col { + t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col) + } + if got.text != want.text { + t.Errorf("%q: got %q; want %q", test.src, got.text, want.text) + } + } +} + func TestScanErrors(t *testing.T) { for _, test := range []struct { src, msg string @@ -354,7 +414,7 @@ func TestScanErrors(t *testing.T) { // TODO(gri) make this use position info t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) } - }, nil) + }, 0) for { s.next() @@ -373,7 +433,7 @@ func TestIssue21938(t *testing.T) { s := "/*" + strings.Repeat(" ", 4089) + "*/ .5" var got scanner - got.init(strings.NewReader(s), nil, nil) + got.init(strings.NewReader(s), nil, 0) got.next() if got.tok != _Literal || got.lit != ".5" { diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go index 4e3551225aa..62eb0fdc301 100644 --- a/src/cmd/compile/internal/syntax/source.go +++ b/src/cmd/compile/internal/syntax/source.go @@ -124,7 +124,8 @@ redo: // EOF if s.r == s.w { if s.ioerr != io.EOF { - s.error(s.ioerr.Error()) + // ensure we never start with a '/' (e.g., rooted path) in the error message + s.error("I/O error: " + s.ioerr.Error()) } return -1 } @@ -201,6 +202,10 @@ func (s *source) stopLit() []byte { if len(s.lit) > 0 { lit = append(s.lit, lit...) } - s.suf = -1 // no pending literal + s.killLit() return lit } + +func (s *source) killLit() { + s.suf = -1 // no pending literal +}