go/src/cmd/asm/internal/lex/lex.go
Rob Pike cb19a20121 [dev.cc] cmd/asm: rewrite to work with new obj API
Considerable rewriting of the parser and assembler (code generator)
but it's simpler and shorter now. The internal Addr type is gone; so
is the package that held it. Parsing of operands goes directly into
obj.Addrs now.

There is a horrible hack regarding register pairs. It uses the Class
field to store the second register since it needs _some_ place to
put it but none is provided in the API. An alternative would be nice
but this works for now.

Once again creates identical .6 and .8 files as the old assembler.

Change-Id: I8207d6dfdfdb5bbed0bd870cb34ee0fe61c2fbfd
Reviewed-on: https://go-review.googlesource.com/4062
Reviewed-by: Russ Cox <rsc@golang.org>
2015-02-09 18:48:19 +00:00

145 lines
4.3 KiB
Go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package lex implements lexical analysis for the assembler.
package lex
import (
"fmt"
"log"
"os"
"strings"
"text/scanner"
"cmd/internal/obj"
)
// A ScanToken represents an input item. It is a simple wrapping of rune, as
// returned by text/scanner.Scanner, plus a couple of extra values.
type ScanToken rune
const (
// Asm defines some two-character lexemes. We make up
// a rune/ScanToken value for them - ugly but simple.
LSH ScanToken = -1000 - iota // << Left shift.
RSH // >> Logical right shift.
ARR // -> Used on ARM for shift type 3, arithmetic right shift.
ROT // @> Used on ARM for shift type 4, rotate right.
macroName // name of macro that should not be expanded
)
func (t ScanToken) String() string {
switch t {
case scanner.EOF:
return "EOF"
case scanner.Ident:
return "identifier"
case scanner.Int:
return "integer constant"
case scanner.Float:
return "float constant"
case scanner.Char:
return "rune constant"
case scanner.String:
return "string constant"
case scanner.RawString:
return "raw string constant"
case scanner.Comment:
return "comment"
default:
return fmt.Sprintf("%q", rune(t))
}
}
var (
// It might be nice if these weren't global.
linkCtxt *obj.Link // The link context for all instructions.
histLine int = 1 // The cumulative count of lines processed.
)
// HistLine reports the cumulative source line number of the token,
// for use in the Prog structure for the linker. (It's always handling the
// instruction from the current lex line.)
// It returns int32 because that's what type ../asm prefers.
func HistLine() int32 {
return int32(histLine)
}
// NewLexer returns a lexer for the named file and the given link context.
func NewLexer(name string, ctxt *obj.Link) TokenReader {
linkCtxt = ctxt
input := NewInput(name)
fd, err := os.Open(name)
if err != nil {
log.Fatalf("asm: %s\n", err)
}
input.Push(NewTokenizer(name, fd, fd))
return input
}
// The other files in this directory each contain an implementation of TokenReader.
// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
// the text of the most recently returned token is, and where it was found.
// The underlying scanner elides all spaces except newline, so the input looks like a stream of
// Tokens; original spacing is lost but we don't need it.
type TokenReader interface {
// Next returns the next token.
Next() ScanToken
// The following methods all refer to the most recent token returned by Next.
// Text returns the original string representation of the token.
Text() string
// File reports the source file name of the token.
File() string
// Line reports the source line number of the token.
Line() int
// SetPos sets the file and line number.
SetPos(line int, file string)
// Close does any teardown required.
Close()
}
// A Token is a scan token plus its string value.
// A macro is stored as a sequence of Tokens with spaces stripped.
type Token struct {
ScanToken
text string
}
// Make returns a Token with the given rune (ScanToken) and text representation.
func Make(token ScanToken, text string) Token {
// If the symbol starts with center dot, as in ·x, rewrite it as ""·x
if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
text = `""` + text
}
// Substitute the substitutes for . and /.
text = strings.Replace(text, "\u00B7", ".", -1)
text = strings.Replace(text, "\u2215", "/", -1)
return Token{ScanToken: token, text: text}
}
func (l Token) String() string {
return l.text
}
// A Macro represents the definition of a #defined macro.
type Macro struct {
name string // The #define name.
args []string // Formal arguments.
tokens []Token // Body of macro.
}
// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
func Tokenize(str string) []Token {
t := NewTokenizer("command line", strings.NewReader(str), nil)
var tokens []Token
for {
tok := t.Next()
if tok == scanner.EOF {
break
}
tokens = append(tokens, Make(tok, t.Text()))
}
return tokens
}