go/src/cmd/asm/internal/lex/lex.go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package lex implements lexical analysis for the assembler.
package lex

import (
	"fmt"
	"log"
	"os"
	"strings"
	"text/scanner"

	"cmd/internal/obj"
)

// A ScanToken represents an input item. It is a simple wrapping of rune, as
// returned by text/scanner.Scanner, plus a couple of extra values.
type ScanToken rune

const (
	// Asm defines some two-character lexemes. We make up
	// a rune/ScanToken value for them - ugly but simple.
	LSH       ScanToken = -1000 - iota // << Left shift.
	RSH                                // >> Logical right shift.
	ARR                                // -> Used on ARM for shift type 3, arithmetic right shift.
	ROT                                // @> Used on ARM for shift type 4, rotate right.
	macroName                          // name of macro that should not be expanded
)

func (t ScanToken) String() string {
	switch t {
	case scanner.EOF:
		return "EOF"
	case scanner.Ident:
		return "identifier"
	case scanner.Int:
		return "integer constant"
	case scanner.Float:
		return "float constant"
	case scanner.Char:
		return "rune constant"
	case scanner.String:
		return "string constant"
	case scanner.RawString:
		return "raw string constant"
	case scanner.Comment:
		return "comment"
	default:
		return fmt.Sprintf("%q", rune(t))
	}
}

var (
	// It might be nice if these weren't global.
	linkCtxt *obj.Link     // The link context for all instructions.
	histLine int       = 1 // The cumulative count of lines processed.
)

// HistLine reports the cumulative source line number of the token,
// for use in the Prog structure for the linker. (It's always handling the
// instruction from the current lex line.)
// It returns int32 because that's what type ../asm prefers.
func HistLine() int32 {
	return int32(histLine)
}

// NewLexer returns a lexer for the named file and the given link context.
func NewLexer(name string, ctxt *obj.Link) TokenReader {
	linkCtxt = ctxt
	input := NewInput(name)
	fd, err := os.Open(name)
	if err != nil {
		log.Fatalf("asm: %s\n", err)
	}
	input.Push(NewTokenizer(name, fd, fd))
	return input
}

// The other files in this directory each contain an implementation of TokenReader.

// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what
// the text of the most recently returned token is, and where it was found.
// The underlying scanner elides all spaces except newline, so the input looks like a  stream of
// Tokens; original spacing is lost but we don't need it.
type TokenReader interface {
	// Next returns the next token.
	Next() ScanToken
	// The following methods all refer to the most recent token returned by Next.
	// Text returns the original string representation of the token.
	Text() string
	// File reports the source file name of the token.
	File() string
	// Line reports the source line number of the token.
	Line() int
	// SetPos sets the file and line number.
	SetPos(line int, file string)
	// Close does any teardown required.
	Close()
}

// A Token is a scan token plus its string value.
// A macro is stored as a sequence of Tokens with spaces stripped.
type Token struct {
	ScanToken
	text string
}

// Make returns a Token with the given rune (ScanToken) and text representation.
func Make(token ScanToken, text string) Token {
	// If the symbol starts with center dot, as in ·x, rewrite it as ""·x
	if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {
		text = `""` + text
	}
	// Substitute the substitutes for . and /.
	text = strings.Replace(text, "\u00B7", ".", -1)
	text = strings.Replace(text, "\u2215", "/", -1)
	return Token{ScanToken: token, text: text}
}

func (l Token) String() string {
	return l.text
}

// A Macro represents the definition of a #defined macro.
type Macro struct {
	name   string   // The #define name.
	args   []string // Formal arguments.
	tokens []Token  // Body of macro.
}

// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.
func Tokenize(str string) []Token {
	t := NewTokenizer("command line", strings.NewReader(str), nil)
	var tokens []Token
	for {
		tok := t.Next()
		if tok == scanner.EOF {
			break
		}
		tokens = append(tokens, Make(tok, t.Text()))
	}
	return tokens
}
[dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-22 10:48:02 -08:00			`// Copyright 2015 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`// Package lex implements lexical analysis for the assembler.`
			`package lex`

			`import (`
			`"fmt"`
			`"log"`
			`"os"`
			`"strings"`
			`"text/scanner"`

			`"cmd/internal/obj"`
			`)`

			`// A ScanToken represents an input item. It is a simple wrapping of rune, as`
			`// returned by text/scanner.Scanner, plus a couple of extra values.`
			`type ScanToken rune`

			`const (`
			`// Asm defines some two-character lexemes. We make up`
			`// a rune/ScanToken value for them - ugly but simple.`
[dev.cc] cmd/asm: handle the case where a macro is named without arguments Given #define X() foo X() X cpp produces foo X Asm does now as well. Change-Id: Ia36b88a23ce1660e6a02559c4f730593d62066f1 Reviewed-on: https://go-review.googlesource.com/3611 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-30 09:57:11 -08:00			`LSH ScanToken = -1000 - iota // << Left shift.`
			`RSH // >> Logical right shift.`
			`ARR // -> Used on ARM for shift type 3, arithmetic right shift.`
			`ROT // @> Used on ARM for shift type 4, rotate right.`
			`macroName // name of macro that should not be expanded`
[dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-22 10:48:02 -08:00			`)`

			`func (t ScanToken) String() string {`
			`switch t {`
			`case scanner.EOF:`
			`return "EOF"`
			`case scanner.Ident:`
			`return "identifier"`
			`case scanner.Int:`
			`return "integer constant"`
			`case scanner.Float:`
			`return "float constant"`
			`case scanner.Char:`
			`return "rune constant"`
			`case scanner.String:`
			`return "string constant"`
			`case scanner.RawString:`
			`return "raw string constant"`
			`case scanner.Comment:`
			`return "comment"`
			`default:`
			`return fmt.Sprintf("%q", rune(t))`
			`}`
			`}`

			`var (`
			`// It might be nice if these weren't global.`
			`linkCtxt *obj.Link // The link context for all instructions.`
			`histLine int = 1 // The cumulative count of lines processed.`
			`)`

			`// HistLine reports the cumulative source line number of the token,`
			`// for use in the Prog structure for the linker. (It's always handling the`
			`// instruction from the current lex line.)`
[dev.cc] cmd/asm: rewrite to work with new obj API Considerable rewriting of the parser and assembler (code generator) but it's simpler and shorter now. The internal Addr type is gone; so is the package that held it. Parsing of operands goes directly into obj.Addrs now. There is a horrible hack regarding register pairs. It uses the Class field to store the second register since it needs _some_ place to put it but none is provided in the API. An alternative would be nice but this works for now. Once again creates identical .6 and .8 files as the old assembler. Change-Id: I8207d6dfdfdb5bbed0bd870cb34ee0fe61c2fbfd Reviewed-on: https://go-review.googlesource.com/4062 Reviewed-by: Russ Cox <rsc@golang.org> 2015-02-06 11:39:23 -08:00			`// It returns int32 because that's what type ../asm prefers.`
			`func HistLine() int32 {`
			`return int32(histLine)`
[dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-22 10:48:02 -08:00			`}`

			`// NewLexer returns a lexer for the named file and the given link context.`
			`func NewLexer(name string, ctxt *obj.Link) TokenReader {`
			`linkCtxt = ctxt`
			`input := NewInput(name)`
			`fd, err := os.Open(name)`
			`if err != nil {`
			`log.Fatalf("asm: %s\n", err)`
			`}`
			`input.Push(NewTokenizer(name, fd, fd))`
			`return input`
			`}`

			`// The other files in this directory each contain an implementation of TokenReader.`

			`// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what`
			`// the text of the most recently returned token is, and where it was found.`
			`// The underlying scanner elides all spaces except newline, so the input looks like a stream of`
			`// Tokens; original spacing is lost but we don't need it.`
			`type TokenReader interface {`
			`// Next returns the next token.`
			`Next() ScanToken`
			`// The following methods all refer to the most recent token returned by Next.`
			`// Text returns the original string representation of the token.`
			`Text() string`
			`// File reports the source file name of the token.`
			`File() string`
			`// Line reports the source line number of the token.`
			`Line() int`
			`// SetPos sets the file and line number.`
			`SetPos(line int, file string)`
			`// Close does any teardown required.`
			`Close()`
			`}`

			`// A Token is a scan token plus its string value.`
			`// A macro is stored as a sequence of Tokens with spaces stripped.`
			`type Token struct {`
			`ScanToken`
			`text string`
			`}`

			`// Make returns a Token with the given rune (ScanToken) and text representation.`
			`func Make(token ScanToken, text string) Token {`
[dev.cc] cmd/asm: changes to get identical output as new6a Fix up a couple of minor things pointed out in the last review. Also: 1. If the symbol starts with center dot, prefix the name with "". 2. If there is no locals size specified, use ArgsSizeUnknown (sic). 3. Do not emit a history point at the start of a macro invocation, since we do not pop it at the end, behavior consistent with the old code. With these changes, old and new assemblers produce identical output at least for my simple test case, so that provides a verifiable check for future cleanups. Change-Id: Iaa91d8e453109824b4be44321ec5e828f39f0299 Reviewed-on: https://go-review.googlesource.com/3242 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-23 11:24:42 -08:00			`// If the symbol starts with center dot, as in ·x, rewrite it as ""·x`
			`if token == scanner.Ident && strings.HasPrefix(text, "\u00B7") {`
			text = `""` + text
			`}`
			`// Substitute the substitutes for . and /.`
[dev.cc] asm: fix handling of statics (data<>) and symbols A typo limited the number of center-dot substitutions to one. Fixed. With these changes, plus a recent fix to 6a, the are no differences, down to the bit level, in object code for any assembly files in std between asm and 6a. (Runtime has not been checked yet, but I expect no errors.) Change-Id: I0e8045b4414223d937e7f8919c8768860554b7d5 Reviewed-on: https://go-review.googlesource.com/3820 Reviewed-by: Russ Cox <rsc@golang.org> 2015-02-03 10:41:16 -08:00			`text = strings.Replace(text, "\u00B7", ".", -1)`
[dev.cc] cmd/asm: changes to get identical output as new6a Fix up a couple of minor things pointed out in the last review. Also: 1. If the symbol starts with center dot, prefix the name with "". 2. If there is no locals size specified, use ArgsSizeUnknown (sic). 3. Do not emit a history point at the start of a macro invocation, since we do not pop it at the end, behavior consistent with the old code. With these changes, old and new assemblers produce identical output at least for my simple test case, so that provides a verifiable check for future cleanups. Change-Id: Iaa91d8e453109824b4be44321ec5e828f39f0299 Reviewed-on: https://go-review.googlesource.com/3242 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-23 11:24:42 -08:00			`text = strings.Replace(text, "\u2215", "/", -1)`
[dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-22 10:48:02 -08:00			`return Token{ScanToken: token, text: text}`
			`}`

			`func (l Token) String() string {`
			`return l.text`
			`}`

			`// A Macro represents the definition of a #defined macro.`
			`type Macro struct {`
			`name string // The #define name.`
			`args []string // Formal arguments.`
			`tokens []Token // Body of macro.`
			`}`

[dev.cc] cmd/asm: fix the expression parser and add tests Rewrite the grammar to have one more production so it parses ~0*0 correctly and write tests to prove it. Change-Id: I0dd652baf65b48a3f26c9287c420702db4eaec59 Reviewed-on: https://go-review.googlesource.com/3443 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-28 11:11:33 -08:00			`// Tokenize turns a string into a list of Tokens; used to parse the -D flag and in tests.`
			`func Tokenize(str string) []Token {`
[dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-22 10:48:02 -08:00			`t := NewTokenizer("command line", strings.NewReader(str), nil)`
			`var tokens []Token`
			`for {`
			`tok := t.Next()`
			`if tok == scanner.EOF {`
			`break`
			`}`
[dev.cc] cmd/asm: changes to get identical output as new6a Fix up a couple of minor things pointed out in the last review. Also: 1. If the symbol starts with center dot, prefix the name with "". 2. If there is no locals size specified, use ArgsSizeUnknown (sic). 3. Do not emit a history point at the start of a macro invocation, since we do not pop it at the end, behavior consistent with the old code. With these changes, old and new assemblers produce identical output at least for my simple test case, so that provides a verifiable check for future cleanups. Change-Id: Iaa91d8e453109824b4be44321ec5e828f39f0299 Reviewed-on: https://go-review.googlesource.com/3242 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-23 11:24:42 -08:00			`tokens = append(tokens, Make(tok, t.Text()))`
[dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox <rsc@golang.org> 2015-01-22 10:48:02 -08:00			`}`
			`return tokens`
			`}`