2019-10-16 18:19:48 +09:00
|
|
|
package scanner
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"io"
|
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
|
|
"github.com/goccy/go-yaml/token"
|
|
|
|
|
"golang.org/x/xerrors"
|
|
|
|
|
)
|
|
|
|
|
|
2019-10-21 12:53:30 +09:00
|
|
|
// IndentState state for indent
|
2019-10-16 18:19:48 +09:00
|
|
|
type IndentState int
|
|
|
|
|
|
|
|
|
|
const (
|
2019-10-21 12:53:30 +09:00
|
|
|
// IndentStateEqual equals previous indent
|
2019-10-16 18:19:48 +09:00
|
|
|
IndentStateEqual IndentState = iota
|
2019-10-21 12:53:30 +09:00
|
|
|
// IndentStateUp more indent than previous
|
2019-10-16 18:19:48 +09:00
|
|
|
IndentStateUp
|
2019-10-21 12:53:30 +09:00
|
|
|
// IndentStateDown less indent than previous
|
2019-10-16 18:19:48 +09:00
|
|
|
IndentStateDown
|
2019-10-21 12:53:30 +09:00
|
|
|
// IndentStateKeep uses not indent token
|
2019-10-16 18:19:48 +09:00
|
|
|
IndentStateKeep
|
|
|
|
|
)
|
|
|
|
|
|
2019-10-21 12:53:30 +09:00
|
|
|
// Scanner holds the scanner's internal state while processing a given text.
|
|
|
|
|
// It can be allocated as part of another data structure but must be initialized via Init before use.
|
2019-10-16 18:19:48 +09:00
|
|
|
type Scanner struct {
|
2019-11-07 18:01:45 +09:00
|
|
|
source []rune
|
2019-11-07 13:00:00 +09:00
|
|
|
sourcePos int
|
|
|
|
|
sourceSize int
|
|
|
|
|
line int
|
|
|
|
|
column int
|
|
|
|
|
offset int
|
|
|
|
|
prevIndentLevel int
|
|
|
|
|
prevIndentNum int
|
|
|
|
|
prevIndentColumn int
|
|
|
|
|
indentLevel int
|
|
|
|
|
indentNum int
|
|
|
|
|
isFirstCharAtLine bool
|
|
|
|
|
isAnchor bool
|
|
|
|
|
startedFlowSequenceNum int
|
|
|
|
|
startedFlowMapNum int
|
|
|
|
|
indentState IndentState
|
|
|
|
|
savedPos *token.Position
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) pos() *token.Position {
|
|
|
|
|
return &token.Position{
|
|
|
|
|
Line: s.line,
|
|
|
|
|
Column: s.column,
|
|
|
|
|
Offset: s.offset,
|
|
|
|
|
IndentNum: s.indentNum,
|
|
|
|
|
IndentLevel: s.indentLevel,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) bufferedToken(ctx *Context) *token.Token {
|
|
|
|
|
if s.savedPos != nil {
|
|
|
|
|
tk := ctx.bufferedToken(s.savedPos)
|
|
|
|
|
s.savedPos = nil
|
|
|
|
|
return tk
|
|
|
|
|
}
|
|
|
|
|
trimmedSrc := strings.TrimLeft(string(ctx.buf), " ")
|
2019-11-07 19:16:19 +09:00
|
|
|
size := len([]rune(trimmedSrc))
|
2019-10-16 18:19:48 +09:00
|
|
|
return ctx.bufferedToken(&token.Position{
|
|
|
|
|
Line: s.line,
|
|
|
|
|
Column: s.column - size,
|
|
|
|
|
Offset: s.offset - size,
|
|
|
|
|
IndentNum: s.indentNum,
|
|
|
|
|
IndentLevel: s.indentLevel,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) progressColumn(ctx *Context, num int) {
|
|
|
|
|
s.column += num
|
|
|
|
|
s.offset += num
|
|
|
|
|
ctx.progress(num)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) progressLine(ctx *Context) {
|
|
|
|
|
s.column = 1
|
|
|
|
|
s.line++
|
|
|
|
|
s.offset++
|
|
|
|
|
s.indentNum = 0
|
|
|
|
|
s.isFirstCharAtLine = true
|
|
|
|
|
s.isAnchor = false
|
|
|
|
|
ctx.progress(1)
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-07 23:54:32 +09:00
|
|
|
func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool {
|
|
|
|
|
if !s.isChangedToIndentStateUp() {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
if ctx.isDocument() {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if c == '-' && ctx.bufferedSrc() != "" {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-07 23:45:39 +09:00
|
|
|
func (s *Scanner) updateIndent(ctx *Context, c rune) {
|
2019-10-16 18:19:48 +09:00
|
|
|
if s.isFirstCharAtLine && c == ' ' {
|
|
|
|
|
s.indentNum++
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if !s.isFirstCharAtLine {
|
|
|
|
|
s.indentState = IndentStateKeep
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if s.prevIndentNum < s.indentNum {
|
|
|
|
|
s.indentLevel = s.prevIndentLevel + 1
|
|
|
|
|
s.indentState = IndentStateUp
|
|
|
|
|
} else if s.prevIndentNum == s.indentNum {
|
|
|
|
|
s.indentLevel = s.prevIndentLevel
|
|
|
|
|
s.indentState = IndentStateEqual
|
|
|
|
|
} else {
|
|
|
|
|
s.indentState = IndentStateDown
|
2019-10-21 14:54:26 +09:00
|
|
|
if s.prevIndentLevel > 0 {
|
|
|
|
|
s.indentLevel = s.prevIndentLevel - 1
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
2019-10-21 15:58:46 +09:00
|
|
|
|
2019-10-23 20:22:14 +09:00
|
|
|
if s.prevIndentColumn > 0 {
|
|
|
|
|
if s.prevIndentColumn < s.column {
|
2019-10-21 15:58:46 +09:00
|
|
|
s.indentState = IndentStateUp
|
2019-10-23 20:22:14 +09:00
|
|
|
} else if s.prevIndentColumn == s.column {
|
2019-10-21 15:58:46 +09:00
|
|
|
s.indentState = IndentStateEqual
|
|
|
|
|
} else {
|
|
|
|
|
s.indentState = IndentStateDown
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-11-07 23:45:39 +09:00
|
|
|
s.isFirstCharAtLine = false
|
2019-11-07 23:54:32 +09:00
|
|
|
if s.isNeededKeepPreviousIndentNum(ctx, c) {
|
2019-11-07 23:45:39 +09:00
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
s.prevIndentNum = s.indentNum
|
2019-10-23 20:22:14 +09:00
|
|
|
s.prevIndentColumn = 0
|
2019-10-16 18:19:48 +09:00
|
|
|
s.prevIndentLevel = s.indentLevel
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) isChangedToIndentStateDown() bool {
|
|
|
|
|
return s.indentState == IndentStateDown
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) isChangedToIndentStateUp() bool {
|
|
|
|
|
return s.indentState == IndentStateUp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) isChangedToIndentStateEqual() bool {
|
|
|
|
|
return s.indentState == IndentStateEqual
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) addBufferedTokenIfExists(ctx *Context) {
|
|
|
|
|
ctx.addToken(s.bufferedToken(ctx))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) breakLiteral(ctx *Context) {
|
|
|
|
|
ctx.breakLiteral()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scanQuote(ctx *Context, ch rune) (tk *token.Token, pos int) {
|
|
|
|
|
ctx.addOriginBuf(ch)
|
|
|
|
|
startIndex := ctx.idx + 1
|
|
|
|
|
ctx.progress(1)
|
|
|
|
|
for idx, c := range ctx.src[startIndex:] {
|
|
|
|
|
pos = idx + 1
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
switch c {
|
|
|
|
|
case ch:
|
|
|
|
|
if ctx.previousChar() == '\\' {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
value := ctx.source(startIndex, startIndex+idx)
|
|
|
|
|
switch ch {
|
|
|
|
|
case '\'':
|
|
|
|
|
tk = token.SingleQuote(value, string(ctx.obuf), s.pos())
|
|
|
|
|
case '"':
|
|
|
|
|
tk = token.DoubleQuote(value, string(ctx.obuf), s.pos())
|
|
|
|
|
}
|
2019-11-07 19:16:19 +09:00
|
|
|
pos = len([]rune(value)) + 1
|
2019-10-16 18:19:48 +09:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scanTag(ctx *Context) (tk *token.Token, pos int) {
|
|
|
|
|
ctx.addOriginBuf('!')
|
|
|
|
|
ctx.progress(1) // skip '!' character
|
|
|
|
|
for idx, c := range ctx.src[ctx.idx:] {
|
|
|
|
|
pos = idx + 1
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
switch c {
|
|
|
|
|
case ' ', '\n':
|
|
|
|
|
value := ctx.source(ctx.idx-1, ctx.idx+idx)
|
|
|
|
|
tk = token.Tag(value, string(ctx.obuf), s.pos())
|
2019-11-07 19:16:19 +09:00
|
|
|
pos = len([]rune(value))
|
2019-10-16 18:19:48 +09:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scanComment(ctx *Context) (tk *token.Token, pos int) {
|
|
|
|
|
ctx.addOriginBuf('#')
|
|
|
|
|
ctx.progress(1) // skip '#' character
|
|
|
|
|
for idx, c := range ctx.src[ctx.idx:] {
|
|
|
|
|
pos = idx + 1
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
switch c {
|
|
|
|
|
case '\n':
|
|
|
|
|
if ctx.previousChar() == '\\' {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
value := ctx.source(ctx.idx, ctx.idx+idx)
|
|
|
|
|
tk = token.Comment(value, string(ctx.obuf), s.pos())
|
2019-11-07 19:16:19 +09:00
|
|
|
pos = len([]rune(value)) + 1
|
2019-10-16 18:19:48 +09:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scanLiteral(ctx *Context, c rune) {
|
|
|
|
|
if ctx.isEOS() {
|
|
|
|
|
value := ctx.bufferedSrc()
|
|
|
|
|
ctx.addToken(token.New(value, string(ctx.obuf), s.pos()))
|
2019-11-07 23:45:39 +09:00
|
|
|
ctx.resetBuffer()
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
|
|
|
|
if c == '\n' {
|
|
|
|
|
if ctx.isLiteral {
|
|
|
|
|
ctx.addBuf(c)
|
|
|
|
|
} else {
|
|
|
|
|
ctx.addBuf(' ')
|
|
|
|
|
}
|
|
|
|
|
s.progressLine(ctx)
|
|
|
|
|
} else if s.isFirstCharAtLine && c == ' ' {
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
} else {
|
|
|
|
|
ctx.addBuf(c)
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
}
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) {
|
|
|
|
|
header := ctx.currentChar()
|
|
|
|
|
ctx.addOriginBuf(header)
|
|
|
|
|
ctx.progress(1) // skip '|' or '<' character
|
|
|
|
|
for idx, c := range ctx.src[ctx.idx:] {
|
|
|
|
|
pos = idx
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
switch c {
|
|
|
|
|
case '\n':
|
|
|
|
|
value := ctx.source(ctx.idx, ctx.idx+idx)
|
|
|
|
|
opt := strings.TrimRight(value, " ")
|
|
|
|
|
switch opt {
|
|
|
|
|
case "", "+", "-",
|
|
|
|
|
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
|
|
|
|
|
if header == '|' {
|
|
|
|
|
ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos()))
|
|
|
|
|
ctx.isLiteral = true
|
|
|
|
|
} else if header == '>' {
|
|
|
|
|
ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos()))
|
|
|
|
|
ctx.isFolded = true
|
|
|
|
|
}
|
|
|
|
|
ctx.resetBuffer()
|
|
|
|
|
ctx.literalOpt = opt
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
err = xerrors.New("invalid literal header")
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scanNewLine(ctx *Context, c rune) {
|
|
|
|
|
if len(ctx.buf) > 0 && s.savedPos == nil {
|
|
|
|
|
s.savedPos = s.pos()
|
2019-11-07 19:16:19 +09:00
|
|
|
s.savedPos.Column -= len([]rune(ctx.bufferedSrc()))
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
|
|
|
|
if ctx.isEOS() {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
2019-10-17 15:44:55 +09:00
|
|
|
} else if s.isAnchor {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
|
|
|
|
ctx.addBuf(' ')
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
s.progressLine(ctx)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *Scanner) scan(ctx *Context) (pos int) {
|
|
|
|
|
for ctx.next() {
|
|
|
|
|
pos = ctx.nextPos()
|
|
|
|
|
c := ctx.currentChar()
|
2019-11-07 23:45:39 +09:00
|
|
|
s.updateIndent(ctx, c)
|
|
|
|
|
if ctx.isDocument() {
|
|
|
|
|
if s.isChangedToIndentStateEqual() ||
|
|
|
|
|
s.isChangedToIndentStateDown() {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
s.breakLiteral(ctx)
|
|
|
|
|
} else {
|
|
|
|
|
s.scanLiteral(ctx, c)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
} else if s.isChangedToIndentStateDown() {
|
2019-10-16 18:19:48 +09:00
|
|
|
s.addBufferedTokenIfExists(ctx)
|
2019-10-25 15:03:08 +09:00
|
|
|
} else if s.isChangedToIndentStateEqual() {
|
|
|
|
|
// if first character is \n, buffer expect to raw folded literal
|
|
|
|
|
if len(ctx.obuf) > 0 && ctx.obuf[0] != '\n' {
|
|
|
|
|
// doesn't raw folded literal
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
|
|
|
|
switch c {
|
|
|
|
|
case '{':
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos()))
|
2019-11-07 13:00:00 +09:00
|
|
|
s.startedFlowMapNum++
|
2019-11-06 19:28:47 +09:00
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case '}':
|
2019-11-07 13:00:00 +09:00
|
|
|
if ctx.bufferedSrc() == "" || s.startedFlowMapNum > 0 {
|
2019-11-06 19:28:47 +09:00
|
|
|
ctx.addToken(s.bufferedToken(ctx))
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos()))
|
2019-11-07 13:00:00 +09:00
|
|
|
s.startedFlowMapNum--
|
2019-11-06 19:28:47 +09:00
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case '.':
|
|
|
|
|
if s.indentNum == 0 && ctx.repeatNum('.') == 3 {
|
|
|
|
|
ctx.addToken(token.DocumentEnd(s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 3)
|
|
|
|
|
pos += 2
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case '<':
|
|
|
|
|
if ctx.repeatNum('<') == 2 {
|
2019-10-23 20:22:14 +09:00
|
|
|
s.prevIndentColumn = s.column
|
2019-10-17 01:58:10 +09:00
|
|
|
ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos()))
|
2019-10-16 18:19:48 +09:00
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
pos++
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case '-':
|
|
|
|
|
if s.indentNum == 0 && ctx.repeatNum('-') == 3 {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
ctx.addToken(token.DocumentHeader(s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 3)
|
|
|
|
|
pos += 2
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-25 15:47:02 +09:00
|
|
|
if ctx.bufferedSrc() != "" && s.isChangedToIndentStateUp() {
|
2019-10-16 18:19:48 +09:00
|
|
|
// raw folded
|
|
|
|
|
ctx.isRawFolded = true
|
|
|
|
|
ctx.addBuf(c)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
nc := ctx.nextChar()
|
|
|
|
|
if nc == ' ' {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
ctx.addOriginBuf(c)
|
2019-10-23 20:22:14 +09:00
|
|
|
tk := token.SequenceEntry(string(ctx.obuf), s.pos())
|
|
|
|
|
s.prevIndentColumn = tk.Position.Column
|
|
|
|
|
ctx.addToken(tk)
|
2019-10-16 18:19:48 +09:00
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case '[':
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos()))
|
2019-11-07 13:00:00 +09:00
|
|
|
s.startedFlowSequenceNum++
|
2019-11-06 19:28:47 +09:00
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case ']':
|
2019-11-07 13:00:00 +09:00
|
|
|
if ctx.bufferedSrc() == "" || s.startedFlowSequenceNum > 0 {
|
2019-11-06 19:28:47 +09:00
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos()))
|
2019-11-07 13:00:00 +09:00
|
|
|
s.startedFlowSequenceNum--
|
2019-11-06 19:28:47 +09:00
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case ',':
|
2019-11-07 13:00:00 +09:00
|
|
|
if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 {
|
2019-11-06 19:28:47 +09:00
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case ':':
|
|
|
|
|
nc := ctx.nextChar()
|
2019-10-30 16:57:59 +09:00
|
|
|
if nc == ' ' || nc == '\n' || ctx.isNextEOS() {
|
2019-10-16 18:19:48 +09:00
|
|
|
// mapping value
|
2019-10-17 15:44:55 +09:00
|
|
|
tk := s.bufferedToken(ctx)
|
|
|
|
|
if tk != nil {
|
2019-10-23 20:22:14 +09:00
|
|
|
s.prevIndentColumn = tk.Position.Column
|
2019-10-17 15:44:55 +09:00
|
|
|
ctx.addToken(tk)
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
ctx.addToken(token.MappingValue(s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case '|', '>':
|
|
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
progress, err := s.scanLiteralHeader(ctx)
|
|
|
|
|
if err != nil {
|
|
|
|
|
// TODO: returns syntax error object
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
s.progressColumn(ctx, progress)
|
|
|
|
|
s.progressLine(ctx)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
case '!':
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
token, progress := s.scanTag(ctx)
|
|
|
|
|
ctx.addToken(token)
|
|
|
|
|
s.progressColumn(ctx, progress)
|
|
|
|
|
if c := ctx.previousChar(); c == '\n' {
|
|
|
|
|
s.progressLine(ctx)
|
|
|
|
|
}
|
|
|
|
|
pos += progress
|
|
|
|
|
return
|
2019-10-16 18:19:48 +09:00
|
|
|
}
|
|
|
|
|
case '%':
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" && s.indentNum == 0 {
|
2019-10-16 18:19:48 +09:00
|
|
|
ctx.addToken(token.Directive(s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case '?':
|
|
|
|
|
nc := ctx.nextChar()
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" && nc == ' ' {
|
2019-10-16 18:19:48 +09:00
|
|
|
ctx.addToken(token.Directive(s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case '&':
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.Anchor(string(ctx.obuf), s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
s.isAnchor = true
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case '*':
|
2019-11-06 19:28:47 +09:00
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
ctx.addToken(token.Alias(string(ctx.obuf), s.pos()))
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case '#':
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
token, progress := s.scanComment(ctx)
|
|
|
|
|
ctx.addToken(token)
|
|
|
|
|
s.progressColumn(ctx, progress)
|
|
|
|
|
s.progressLine(ctx)
|
|
|
|
|
pos += progress
|
|
|
|
|
return
|
|
|
|
|
case '\'', '"':
|
2019-11-07 18:08:12 +09:00
|
|
|
if ctx.bufferedSrc() == "" {
|
|
|
|
|
token, progress := s.scanQuote(ctx, c)
|
|
|
|
|
ctx.addToken(token)
|
|
|
|
|
s.progressColumn(ctx, progress)
|
|
|
|
|
pos += progress
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-10-16 18:19:48 +09:00
|
|
|
case '\n':
|
|
|
|
|
s.scanNewLine(ctx, c)
|
|
|
|
|
continue
|
|
|
|
|
case ' ':
|
|
|
|
|
if ctx.isSaveIndentMode() || (!s.isAnchor && !s.isFirstCharAtLine) {
|
|
|
|
|
ctx.addBuf(c)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if s.isFirstCharAtLine {
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
s.addBufferedTokenIfExists(ctx)
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
s.isAnchor = false
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
ctx.addBuf(c)
|
|
|
|
|
ctx.addOriginBuf(c)
|
|
|
|
|
s.progressColumn(ctx, 1)
|
|
|
|
|
}
|
2019-10-23 03:21:42 +09:00
|
|
|
s.addBufferedTokenIfExists(ctx)
|
2019-10-16 18:19:48 +09:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-21 12:53:30 +09:00
|
|
|
// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src.
|
2019-11-07 18:01:45 +09:00
|
|
|
func (s *Scanner) Init(text string) {
|
|
|
|
|
src := []rune(text)
|
2019-10-16 18:19:48 +09:00
|
|
|
s.source = src
|
|
|
|
|
s.sourcePos = 0
|
|
|
|
|
s.sourceSize = len(src)
|
|
|
|
|
s.line = 1
|
|
|
|
|
s.column = 1
|
|
|
|
|
s.offset = 1
|
|
|
|
|
s.prevIndentLevel = 0
|
|
|
|
|
s.prevIndentNum = 0
|
2019-10-23 20:22:14 +09:00
|
|
|
s.prevIndentColumn = 0
|
2019-10-16 18:19:48 +09:00
|
|
|
s.indentLevel = 0
|
|
|
|
|
s.indentNum = 0
|
|
|
|
|
s.isFirstCharAtLine = true
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-21 12:53:30 +09:00
|
|
|
// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
|
2019-10-16 18:19:48 +09:00
|
|
|
func (s *Scanner) Scan() (token.Tokens, error) {
|
|
|
|
|
if s.sourcePos >= s.sourceSize {
|
|
|
|
|
return nil, io.EOF
|
|
|
|
|
}
|
|
|
|
|
ctx := newContext(s.source[s.sourcePos:])
|
|
|
|
|
progress := s.scan(ctx)
|
|
|
|
|
s.sourcePos += progress
|
|
|
|
|
return ctx.tokens, nil
|
|
|
|
|
}
|