mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: accept new Go2 number literals
This CL introduces compiler support for the new binary and octal integer
literals, hexadecimal floats, and digit separators for all number literals.
The new Go 2 number literal scanner accepts the following liberal format:
number = [ prefix ] digits [ "." digits ] [ exponent ] [ "i" ] .
prefix = "0" [ "b" |"B" | "o" | "O" | "x" | "X" ] .
digits = { digit | "_" } .
exponent = ( "e" | "E" | "p" | "P" ) [ "+" | "-" ] digits .
If the number starts with "0x" or "0X", digit is any hexadecimal digit;
otherwise, digit is any decimal digit. If the accepted number is not valid,
errors are reported accordingly.
See the new test cases in scanner_test.go for a selection of valid and
invalid numbers and the respective error messages.
R=Go1.13
Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.
Change-Id: Ic8febc7bd4dc5186b16a8c8897691e81125cf0ca
Reviewed-on: https://go-review.googlesource.com/c/157677
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
7bc2aa670f
commit
ceb849dd97
7 changed files with 487 additions and 88 deletions
|
|
@ -47,6 +47,10 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod
|
|||
s.nlsemi = false
|
||||
}
|
||||
|
||||
func (s *scanner) errorf(format string, args ...interface{}) {
|
||||
s.error(fmt.Sprintf(format, args...))
|
||||
}
|
||||
|
||||
// next advances the scanner by reading the next token.
|
||||
//
|
||||
// If a read, source encoding, or lexical error occurs, next calls
|
||||
|
|
@ -149,8 +153,9 @@ redo:
|
|||
|
||||
case '.':
|
||||
c = s.getr()
|
||||
if isDigit(c) {
|
||||
s.unread(1)
|
||||
if isDecimal(c) {
|
||||
s.ungetr()
|
||||
s.unread(1) // correct position of '.' (needed by startLit in number)
|
||||
s.number('.')
|
||||
break
|
||||
}
|
||||
|
|
@ -304,7 +309,7 @@ redo:
|
|||
|
||||
default:
|
||||
s.tok = 0
|
||||
s.error(fmt.Sprintf("invalid character %#U", c))
|
||||
s.errorf("invalid character %#U", c)
|
||||
goto redo
|
||||
}
|
||||
|
||||
|
|
@ -320,11 +325,7 @@ assignop:
|
|||
}
|
||||
|
||||
func isLetter(c rune) bool {
|
||||
return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
|
||||
}
|
||||
|
||||
func isDigit(c rune) bool {
|
||||
return '0' <= c && c <= '9'
|
||||
return 'a' <= lower(c) && lower(c) <= 'z' || c == '_'
|
||||
}
|
||||
|
||||
func (s *scanner) ident() {
|
||||
|
|
@ -332,7 +333,7 @@ func (s *scanner) ident() {
|
|||
|
||||
// accelerate common case (7bit ASCII)
|
||||
c := s.getr()
|
||||
for isLetter(c) || isDigit(c) {
|
||||
for isLetter(c) || isDecimal(c) {
|
||||
c = s.getr()
|
||||
}
|
||||
|
||||
|
|
@ -372,10 +373,10 @@ func (s *scanner) isIdentRune(c rune, first bool) bool {
|
|||
// ok
|
||||
case unicode.IsDigit(c):
|
||||
if first {
|
||||
s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c))
|
||||
s.errorf("identifier cannot begin with digit %#U", c)
|
||||
}
|
||||
case c >= utf8.RuneSelf:
|
||||
s.error(fmt.Sprintf("invalid identifier character %#U", c))
|
||||
s.errorf("invalid identifier character %#U", c)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
|
@ -401,86 +402,188 @@ func init() {
|
|||
}
|
||||
}
|
||||
|
||||
func lower(c rune) rune { return ('a' - 'A') | c } // returns lower-case c iff c is ASCII letter
|
||||
func isDecimal(c rune) bool { return '0' <= c && c <= '9' }
|
||||
func isHex(c rune) bool { return '0' <= c && c <= '9' || 'a' <= lower(c) && lower(c) <= 'f' }
|
||||
|
||||
// digits accepts the sequence { digit | '_' } starting with c0.
|
||||
// If base <= 10, digits accepts any decimal digit but records
|
||||
// the index (relative to the literal start) of a digit >= base
|
||||
// in *invalid, if *invalid < 0.
|
||||
// digits returns the first rune that is not part of the sequence
|
||||
// anymore, and a bitset describing whether the sequence contained
|
||||
// digits (bit 0 is set), or separators '_' (bit 1 is set).
|
||||
func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {
|
||||
c = c0
|
||||
if base <= 10 {
|
||||
max := rune('0' + base)
|
||||
for isDecimal(c) || c == '_' {
|
||||
ds := 1
|
||||
if c == '_' {
|
||||
ds = 2
|
||||
} else if c >= max && *invalid < 0 {
|
||||
*invalid = int(s.col0 - s.col) // record invalid rune index
|
||||
}
|
||||
digsep |= ds
|
||||
c = s.getr()
|
||||
}
|
||||
} else {
|
||||
for isHex(c) || c == '_' {
|
||||
ds := 1
|
||||
if c == '_' {
|
||||
ds = 2
|
||||
}
|
||||
digsep |= ds
|
||||
c = s.getr()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (s *scanner) number(c rune) {
|
||||
s.startLit()
|
||||
|
||||
base := 10 // number base
|
||||
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
|
||||
digsep := 0 // bit 0: digit present, bit 1: '_' present
|
||||
invalid := -1 // index of invalid digit in literal, or < 0
|
||||
|
||||
// integer part
|
||||
var ds int
|
||||
if c != '.' {
|
||||
s.kind = IntLit // until proven otherwise
|
||||
s.kind = IntLit
|
||||
if c == '0' {
|
||||
c = s.getr()
|
||||
if c == 'x' || c == 'X' {
|
||||
// hex
|
||||
switch lower(c) {
|
||||
case 'x':
|
||||
c = s.getr()
|
||||
hasDigit := false
|
||||
for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
|
||||
c = s.getr()
|
||||
hasDigit = true
|
||||
}
|
||||
if !hasDigit {
|
||||
s.error("malformed hex constant")
|
||||
}
|
||||
goto done
|
||||
}
|
||||
|
||||
// decimal 0, octal, or float
|
||||
has8or9 := false
|
||||
for isDigit(c) {
|
||||
if c > '7' {
|
||||
has8or9 = true
|
||||
}
|
||||
base, prefix = 16, 'x'
|
||||
case 'o':
|
||||
c = s.getr()
|
||||
}
|
||||
if c != '.' && c != 'e' && c != 'E' && c != 'i' {
|
||||
// octal
|
||||
if has8or9 {
|
||||
s.error("malformed octal constant")
|
||||
}
|
||||
goto done
|
||||
}
|
||||
|
||||
} else {
|
||||
// decimal or float
|
||||
for isDigit(c) {
|
||||
base, prefix = 8, 'o'
|
||||
case 'b':
|
||||
c = s.getr()
|
||||
base, prefix = 2, 'b'
|
||||
default:
|
||||
base, prefix = 8, '0'
|
||||
digsep = 1 // leading 0
|
||||
}
|
||||
}
|
||||
c, ds = s.digits(c, base, &invalid)
|
||||
digsep |= ds
|
||||
}
|
||||
|
||||
// float
|
||||
// fractional part
|
||||
if c == '.' {
|
||||
s.kind = FloatLit
|
||||
c = s.getr()
|
||||
for isDigit(c) {
|
||||
c = s.getr()
|
||||
if prefix == 'o' || prefix == 'b' {
|
||||
s.error("invalid radix point in " + litname(prefix))
|
||||
}
|
||||
c, ds = s.digits(s.getr(), base, &invalid)
|
||||
digsep |= ds
|
||||
}
|
||||
|
||||
if digsep&1 == 0 {
|
||||
s.error(litname(prefix) + " has no digits")
|
||||
}
|
||||
|
||||
// exponent
|
||||
if c == 'e' || c == 'E' {
|
||||
s.kind = FloatLit
|
||||
if e := lower(c); e == 'e' || e == 'p' {
|
||||
switch {
|
||||
case e == 'e' && prefix != 0 && prefix != '0':
|
||||
s.errorf("%q exponent requires decimal mantissa", c)
|
||||
case e == 'p' && prefix != 'x':
|
||||
s.errorf("%q exponent requires hexadecimal mantissa", c)
|
||||
}
|
||||
c = s.getr()
|
||||
if c == '-' || c == '+' {
|
||||
s.kind = FloatLit
|
||||
if c == '+' || c == '-' {
|
||||
c = s.getr()
|
||||
}
|
||||
if !isDigit(c) {
|
||||
s.error("malformed floating-point constant exponent")
|
||||
}
|
||||
for isDigit(c) {
|
||||
c = s.getr()
|
||||
c, ds = s.digits(c, 10, nil)
|
||||
digsep |= ds
|
||||
if ds&1 == 0 {
|
||||
s.error("exponent has no digits")
|
||||
}
|
||||
} else if prefix == 'x' && s.kind == FloatLit {
|
||||
s.error("hexadecimal mantissa requires a 'p' exponent")
|
||||
}
|
||||
|
||||
// complex
|
||||
// suffix 'i'
|
||||
if c == 'i' {
|
||||
s.kind = ImagLit
|
||||
s.getr()
|
||||
if prefix != 0 && prefix != '0' {
|
||||
s.error("invalid suffix 'i' on " + litname(prefix))
|
||||
}
|
||||
c = s.getr()
|
||||
}
|
||||
|
||||
done:
|
||||
s.ungetr()
|
||||
|
||||
s.nlsemi = true
|
||||
s.lit = string(s.stopLit())
|
||||
s.tok = _Literal
|
||||
|
||||
if s.kind == IntLit && invalid >= 0 {
|
||||
s.errh(s.line, s.col+uint(invalid), fmt.Sprintf("invalid digit %q in %s", s.lit[invalid], litname(prefix)))
|
||||
}
|
||||
|
||||
if digsep&2 != 0 {
|
||||
if i := invalidSep(s.lit); i >= 0 {
|
||||
s.errh(s.line, s.col+uint(i), "'_' must separate successive digits")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func litname(prefix rune) string {
|
||||
switch prefix {
|
||||
case 'x':
|
||||
return "hexadecimal literal"
|
||||
case 'o', '0':
|
||||
return "octal literal"
|
||||
case 'b':
|
||||
return "binary literal"
|
||||
}
|
||||
return "decimal literal"
|
||||
}
|
||||
|
||||
// invalidSep returns the index of the first invalid separator in x, or -1.
|
||||
func invalidSep(x string) int {
|
||||
x1 := ' ' // prefix char, we only care if it's 'x'
|
||||
d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
|
||||
i := 0
|
||||
|
||||
// a prefix counts as a digit
|
||||
if len(x) >= 2 && x[0] == '0' {
|
||||
x1 = lower(rune(x[1]))
|
||||
if x1 == 'x' || x1 == 'o' || x1 == 'b' {
|
||||
d = '0'
|
||||
i = 2
|
||||
}
|
||||
}
|
||||
|
||||
// mantissa and exponent
|
||||
for ; i < len(x); i++ {
|
||||
p := d // previous digit
|
||||
d = rune(x[i])
|
||||
switch {
|
||||
case d == '_':
|
||||
if p != '0' {
|
||||
return i
|
||||
}
|
||||
case isDecimal(d) || x1 == 'x' && isHex(d):
|
||||
d = '0'
|
||||
default:
|
||||
if p == '_' {
|
||||
return i - 1
|
||||
}
|
||||
d = '.'
|
||||
}
|
||||
}
|
||||
if d == '_' {
|
||||
return len(x) - 1
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func (s *scanner) rune() {
|
||||
|
|
@ -713,12 +816,10 @@ func (s *scanner) escape(quote rune) bool {
|
|||
for i := n; i > 0; i-- {
|
||||
d := base
|
||||
switch {
|
||||
case isDigit(c):
|
||||
case isDecimal(c):
|
||||
d = uint32(c) - '0'
|
||||
case 'a' <= c && c <= 'f':
|
||||
d = uint32(c) - ('a' - 10)
|
||||
case 'A' <= c && c <= 'F':
|
||||
d = uint32(c) - ('A' - 10)
|
||||
case 'a' <= lower(c) && lower(c) <= 'f':
|
||||
d = uint32(lower(c)) - ('a' - 10)
|
||||
}
|
||||
if d >= base {
|
||||
if c < 0 {
|
||||
|
|
@ -728,7 +829,7 @@ func (s *scanner) escape(quote rune) bool {
|
|||
if base == 8 {
|
||||
kind = "octal"
|
||||
}
|
||||
s.error(fmt.Sprintf("non-%s character in escape sequence: %c", kind, c))
|
||||
s.errorf("non-%s character in escape sequence: %c", kind, c)
|
||||
s.ungetr()
|
||||
return false
|
||||
}
|
||||
|
|
@ -739,7 +840,7 @@ func (s *scanner) escape(quote rune) bool {
|
|||
s.ungetr()
|
||||
|
||||
if x > max && base == 8 {
|
||||
s.error(fmt.Sprintf("octal escape value > 255: %d", x))
|
||||
s.errorf("octal escape value > 255: %d", x)
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue