mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
io.ByteBuffer -> bytes.Buffer left io.ByteBuffer stub around for now, for protocol compiler. R=r OCL=30861 CL=30872
446 lines
9.8 KiB
Go
446 lines
9.8 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package datafmt
|
|
|
|
import (
|
|
"container/vector";
|
|
"datafmt";
|
|
"fmt";
|
|
"go/scanner";
|
|
"go/token";
|
|
"os";
|
|
"strconv";
|
|
"strings";
|
|
)
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Error handling
|
|
|
|
// Error describes an individual error. The position Pos, if valid,
|
|
// indicates the format source position the error relates to. The
|
|
// error is specified with the Msg string.
|
|
//
|
|
type Error struct {
|
|
Pos token.Position;
|
|
Msg string;
|
|
}
|
|
|
|
|
|
func (e *Error) String() string {
|
|
pos := "";
|
|
if e.Pos.IsValid() {
|
|
pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column);
|
|
}
|
|
return pos + e.Msg;
|
|
}
|
|
|
|
|
|
// An ErrorList is a list of errors encountered during parsing.
|
|
type ErrorList []*Error
|
|
|
|
|
|
// ErrorList implements SortInterface and the os.Error interface.
|
|
|
|
func (p ErrorList) Len() int { return len(p); }
|
|
func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; }
|
|
func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; }
|
|
|
|
|
|
func (p ErrorList) String() string {
|
|
switch len(p) {
|
|
case 0: return "unspecified error";
|
|
case 1: return p[0].String();
|
|
}
|
|
return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
|
|
}
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Parsing
|
|
|
|
type parser struct {
|
|
errors vector.Vector;
|
|
scanner scanner.Scanner;
|
|
pos token.Position; // token position
|
|
tok token.Token; // one token look-ahead
|
|
lit []byte; // token literal
|
|
|
|
packs map [string] string; // PackageName -> ImportPath
|
|
rules map [string] expr; // RuleName -> Expression
|
|
}
|
|
|
|
|
|
func (p *parser) next() {
|
|
p.pos, p.tok, p.lit = p.scanner.Scan();
|
|
switch p.tok {
|
|
case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT:
|
|
// Go keywords for composite types are type names
|
|
// returned by reflect. Accept them as identifiers.
|
|
p.tok = token.IDENT; // p.lit is already set correctly
|
|
}
|
|
}
|
|
|
|
|
|
func (p *parser) init(src []byte) {
|
|
p.errors.Init(0);
|
|
p.scanner.Init(src, p, scanner.AllowIllegalChars); // return '@' as token.ILLEGAL w/o error message
|
|
p.next(); // initializes pos, tok, lit
|
|
p.packs = make(map [string] string);
|
|
p.rules = make(map [string] expr);
|
|
}
|
|
|
|
|
|
// The parser implements scanner.Error.
|
|
func (p *parser) Error(pos token.Position, msg string) {
|
|
// Don't collect errors that are on the same line as the previous error
|
|
// in the hope to reduce the number of spurious errors due to incorrect
|
|
// parser synchronization.
|
|
if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line {
|
|
p.errors.Push(&Error{pos, msg});
|
|
}
|
|
}
|
|
|
|
|
|
func (p *parser) errorExpected(pos token.Position, msg string) {
|
|
msg = "expected " + msg;
|
|
if pos.Offset == p.pos.Offset {
|
|
// the error happened at the current position;
|
|
// make the error message more specific
|
|
msg += ", found '" + p.tok.String() + "'";
|
|
if p.tok.IsLiteral() {
|
|
msg += " " + string(p.lit);
|
|
}
|
|
}
|
|
p.Error(pos, msg);
|
|
}
|
|
|
|
|
|
func (p *parser) expect(tok token.Token) token.Position {
|
|
pos := p.pos;
|
|
if p.tok != tok {
|
|
p.errorExpected(pos, "'" + tok.String() + "'");
|
|
}
|
|
p.next(); // make progress in any case
|
|
return pos;
|
|
}
|
|
|
|
|
|
func (p *parser) parseIdentifier() string {
|
|
name := string(p.lit);
|
|
p.expect(token.IDENT);
|
|
return name;
|
|
}
|
|
|
|
|
|
func (p *parser) parseTypeName() (string, bool) {
|
|
pos := p.pos;
|
|
name, isIdent := p.parseIdentifier(), true;
|
|
if p.tok == token.PERIOD {
|
|
// got a package name, lookup package
|
|
if importPath, found := p.packs[name]; found {
|
|
name = importPath;
|
|
} else {
|
|
p.Error(pos, "package not declared: " + name);
|
|
}
|
|
p.next();
|
|
name, isIdent = name + "." + p.parseIdentifier(), false;
|
|
}
|
|
return name, isIdent;
|
|
}
|
|
|
|
|
|
// Parses a rule name and returns it. If the rule name is
|
|
// a package-qualified type name, the package name is resolved.
|
|
// The 2nd result value is true iff the rule name consists of a
|
|
// single identifier only (and thus could be a package name).
|
|
//
|
|
func (p *parser) parseRuleName() (string, bool) {
|
|
name, isIdent := "", false;
|
|
switch p.tok {
|
|
case token.IDENT:
|
|
name, isIdent = p.parseTypeName();
|
|
case token.DEFAULT:
|
|
name = "default";
|
|
p.next();
|
|
case token.QUO:
|
|
name = "/";
|
|
p.next();
|
|
default:
|
|
p.errorExpected(p.pos, "rule name");
|
|
p.next(); // make progress in any case
|
|
}
|
|
return name, isIdent;
|
|
}
|
|
|
|
|
|
func (p *parser) parseString() string {
|
|
s := "";
|
|
if p.tok == token.STRING {
|
|
var err os.Error;
|
|
s, err = strconv.Unquote(string(p.lit));
|
|
// Unquote may fail with an error, but only if the scanner found
|
|
// an illegal string in the first place. In this case the error
|
|
// has already been reported.
|
|
p.next();
|
|
return s;
|
|
} else {
|
|
p.expect(token.STRING);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
|
|
func (p *parser) parseLiteral() literal {
|
|
s := strings.Bytes(p.parseString());
|
|
|
|
// A string literal may contain %-format specifiers. To simplify
|
|
// and speed up printing of the literal, split it into segments
|
|
// that start with "%" possibly followed by a last segment that
|
|
// starts with some other character.
|
|
var list vector.Vector;
|
|
list.Init(0);
|
|
i0 := 0;
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] == '%' && i+1 < len(s) {
|
|
// the next segment starts with a % format
|
|
if i0 < i {
|
|
// the current segment is not empty, split it off
|
|
list.Push(s[i0 : i]);
|
|
i0 = i;
|
|
}
|
|
i++; // skip %; let loop skip over char after %
|
|
}
|
|
}
|
|
// the final segment may start with any character
|
|
// (it is empty iff the string is empty)
|
|
list.Push(s[i0 : len(s)]);
|
|
|
|
// convert list into a literal
|
|
lit := make(literal, list.Len());
|
|
for i := 0; i < list.Len(); i++ {
|
|
lit[i] = list.At(i).([]byte);
|
|
}
|
|
|
|
return lit;
|
|
}
|
|
|
|
|
|
func (p *parser) parseField() expr {
|
|
var fname string;
|
|
switch p.tok {
|
|
case token.ILLEGAL:
|
|
if string(p.lit) != "@" {
|
|
return nil;
|
|
}
|
|
fname = "@";
|
|
p.next();
|
|
case token.MUL:
|
|
fname = "*";
|
|
p.next();
|
|
case token.IDENT:
|
|
fname = p.parseIdentifier();
|
|
default:
|
|
return nil;
|
|
}
|
|
|
|
var ruleName string;
|
|
if p.tok == token.COLON {
|
|
p.next();
|
|
var _ bool;
|
|
ruleName, _ = p.parseRuleName();
|
|
}
|
|
|
|
return &field{fname, ruleName};
|
|
}
|
|
|
|
|
|
func (p *parser) parseExpression() expr
|
|
|
|
func (p *parser) parseOperand() (x expr) {
|
|
switch p.tok {
|
|
case token.STRING:
|
|
x = p.parseLiteral();
|
|
|
|
case token.LPAREN:
|
|
p.next();
|
|
x = p.parseExpression();
|
|
if p.tok == token.SHR {
|
|
p.next();
|
|
x = &group{x, p.parseExpression()};
|
|
}
|
|
p.expect(token.RPAREN);
|
|
|
|
case token.LBRACK:
|
|
p.next();
|
|
x = &option{p.parseExpression()};
|
|
p.expect(token.RBRACK);
|
|
|
|
case token.LBRACE:
|
|
p.next();
|
|
x = p.parseExpression();
|
|
var div expr;
|
|
if p.tok == token.QUO {
|
|
p.next();
|
|
div = p.parseExpression();
|
|
}
|
|
x = &repetition{x, div};
|
|
p.expect(token.RBRACE);
|
|
|
|
default:
|
|
x = p.parseField(); // may be nil
|
|
}
|
|
|
|
return x;
|
|
}
|
|
|
|
|
|
func (p *parser) parseSequence() expr {
|
|
var list vector.Vector;
|
|
list.Init(0);
|
|
|
|
for x := p.parseOperand(); x != nil; x = p.parseOperand() {
|
|
list.Push(x);
|
|
}
|
|
|
|
// no need for a sequence if list.Len() < 2
|
|
switch list.Len() {
|
|
case 0: return nil;
|
|
case 1: return list.At(0).(expr);
|
|
}
|
|
|
|
// convert list into a sequence
|
|
seq := make(sequence, list.Len());
|
|
for i := 0; i < list.Len(); i++ {
|
|
seq[i] = list.At(i).(expr);
|
|
}
|
|
return seq;
|
|
}
|
|
|
|
|
|
func (p *parser) parseExpression() expr {
|
|
var list vector.Vector;
|
|
list.Init(0);
|
|
|
|
for {
|
|
x := p.parseSequence();
|
|
if x != nil {
|
|
list.Push(x);
|
|
}
|
|
if p.tok != token.OR {
|
|
break;
|
|
}
|
|
p.next();
|
|
}
|
|
|
|
// no need for an alternatives if list.Len() < 2
|
|
switch list.Len() {
|
|
case 0: return nil;
|
|
case 1: return list.At(0).(expr);
|
|
}
|
|
|
|
// convert list into a alternatives
|
|
alt := make(alternatives, list.Len());
|
|
for i := 0; i < list.Len(); i++ {
|
|
alt[i] = list.At(i).(expr);
|
|
}
|
|
return alt;
|
|
}
|
|
|
|
|
|
func (p *parser) parseFormat() {
|
|
for p.tok != token.EOF {
|
|
pos := p.pos;
|
|
|
|
name, isIdent := p.parseRuleName();
|
|
switch p.tok {
|
|
case token.STRING:
|
|
// package declaration
|
|
importPath := p.parseString();
|
|
|
|
// add package declaration
|
|
if !isIdent {
|
|
p.Error(pos, "illegal package name: " + name);
|
|
} else if _, found := p.packs[name]; !found {
|
|
p.packs[name] = importPath;
|
|
} else {
|
|
p.Error(pos, "package already declared: " + name);
|
|
}
|
|
|
|
case token.ASSIGN:
|
|
// format rule
|
|
p.next();
|
|
x := p.parseExpression();
|
|
|
|
// add rule
|
|
if _, found := p.rules[name]; !found {
|
|
p.rules[name] = x;
|
|
} else {
|
|
p.Error(pos, "format rule already declared: " + name);
|
|
}
|
|
|
|
default:
|
|
p.errorExpected(p.pos, "package declaration or format rule");
|
|
p.next(); // make progress in any case
|
|
}
|
|
|
|
if p.tok == token.SEMICOLON {
|
|
p.next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
p.expect(token.EOF);
|
|
}
|
|
|
|
|
|
func remap(p *parser, name string) string {
|
|
i := strings.Index(name, ".");
|
|
if i >= 0 {
|
|
packageName, suffix := name[0 : i], name[i : len(name)];
|
|
// lookup package
|
|
if importPath, found := p.packs[packageName]; found {
|
|
name = importPath + suffix;
|
|
} else {
|
|
var invalidPos token.Position;
|
|
p.Error(invalidPos, "package not declared: " + packageName);
|
|
}
|
|
}
|
|
return name;
|
|
}
|
|
|
|
|
|
// Parse parses a set of format productions from source src. Custom
|
|
// formatters may be provided via a map of formatter functions. If
|
|
// there are no errors, the result is a Format and the error is nil.
|
|
// Otherwise the format is nil and a non-empty ErrorList is returned.
|
|
//
|
|
func Parse(src []byte, fmap FormatterMap) (Format, os.Error) {
|
|
// parse source
|
|
var p parser;
|
|
p.init(src);
|
|
p.parseFormat();
|
|
|
|
// add custom formatters, if any
|
|
for name, form := range fmap {
|
|
name = remap(&p, name);
|
|
if t, found := p.rules[name]; !found {
|
|
p.rules[name] = &custom{name, form};
|
|
} else {
|
|
var invalidPos token.Position;
|
|
p.Error(invalidPos, "formatter already declared: " + name);
|
|
}
|
|
}
|
|
|
|
// convert errors list, if any
|
|
if p.errors.Len() > 0 {
|
|
errors := make(ErrorList, p.errors.Len());
|
|
for i := 0; i < p.errors.Len(); i++ {
|
|
errors[i] = p.errors.At(i).(*Error);
|
|
}
|
|
return nil, errors;
|
|
}
|
|
|
|
return p.rules, nil;
|
|
}
|