2019-10-16 18:19:48 +09:00
package scanner
import (
"io"
"strings"
"github.com/goccy/go-yaml/token"
"golang.org/x/xerrors"
)
2019-10-21 12:53:30 +09:00
// IndentState state for indent
2019-10-16 18:19:48 +09:00
type IndentState int
const (
2019-10-21 12:53:30 +09:00
// IndentStateEqual equals previous indent
2019-10-16 18:19:48 +09:00
IndentStateEqual IndentState = iota
2019-10-21 12:53:30 +09:00
// IndentStateUp more indent than previous
2019-10-16 18:19:48 +09:00
IndentStateUp
2019-10-21 12:53:30 +09:00
// IndentStateDown less indent than previous
2019-10-16 18:19:48 +09:00
IndentStateDown
2019-10-21 12:53:30 +09:00
// IndentStateKeep uses not indent token
2019-10-16 18:19:48 +09:00
IndentStateKeep
)
2019-10-21 12:53:30 +09:00
// Scanner holds the scanner's internal state while processing a given text.
// It can be allocated as part of another data structure but must be initialized via Init before use.
2019-10-16 18:19:48 +09:00
type Scanner struct {
2019-11-07 18:01:45 +09:00
source [ ] rune
2019-11-07 13:00:00 +09:00
sourcePos int
sourceSize int
line int
column int
offset int
prevIndentLevel int
prevIndentNum int
prevIndentColumn int
2019-11-08 01:04:29 +09:00
docStartColumn int
2019-11-07 13:00:00 +09:00
indentLevel int
indentNum int
isFirstCharAtLine bool
isAnchor bool
startedFlowSequenceNum int
startedFlowMapNum int
indentState IndentState
savedPos * token . Position
2019-10-16 18:19:48 +09:00
}
func ( s * Scanner ) pos ( ) * token . Position {
return & token . Position {
Line : s . line ,
Column : s . column ,
Offset : s . offset ,
IndentNum : s . indentNum ,
IndentLevel : s . indentLevel ,
}
}
func ( s * Scanner ) bufferedToken ( ctx * Context ) * token . Token {
if s . savedPos != nil {
tk := ctx . bufferedToken ( s . savedPos )
s . savedPos = nil
return tk
}
2019-12-29 00:16:35 +09:00
size := len ( ctx . buf )
2019-10-16 18:19:48 +09:00
return ctx . bufferedToken ( & token . Position {
Line : s . line ,
Column : s . column - size ,
Offset : s . offset - size ,
IndentNum : s . indentNum ,
IndentLevel : s . indentLevel ,
} )
}
func ( s * Scanner ) progressColumn ( ctx * Context , num int ) {
s . column += num
s . offset += num
ctx . progress ( num )
}
func ( s * Scanner ) progressLine ( ctx * Context ) {
s . column = 1
s . line ++
s . offset ++
s . indentNum = 0
s . isFirstCharAtLine = true
s . isAnchor = false
ctx . progress ( 1 )
}
2019-11-07 23:54:32 +09:00
func ( s * Scanner ) isNeededKeepPreviousIndentNum ( ctx * Context , c rune ) bool {
if ! s . isChangedToIndentStateUp ( ) {
return false
}
if ctx . isDocument ( ) {
return true
}
2019-12-28 22:37:50 +09:00
if c == '-' && ctx . existsBuffer ( ) {
2019-11-07 23:54:32 +09:00
return true
}
return false
}
2019-12-22 11:59:49 +09:00
func ( s * Scanner ) isNewLineChar ( c rune ) bool {
if c == '\n' {
return true
}
if c == '\r' {
return true
}
return false
}
2019-12-22 17:28:30 +09:00
func ( s * Scanner ) newLineCount ( src [ ] rune ) int {
size := len ( src )
cnt := 0
for i := 0 ; i < size ; i ++ {
c := src [ i ]
switch c {
case '\r' :
if i + 1 < size && src [ i + 1 ] == '\n' {
i ++
}
cnt ++
case '\n' :
cnt ++
}
}
return cnt
}
2019-11-07 23:45:39 +09:00
func ( s * Scanner ) updateIndent ( ctx * Context , c rune ) {
2019-12-22 11:59:49 +09:00
if s . isFirstCharAtLine && s . isNewLineChar ( c ) && ctx . isDocument ( ) {
2019-11-19 18:03:24 +09:00
return
}
2019-10-16 18:19:48 +09:00
if s . isFirstCharAtLine && c == ' ' {
s . indentNum ++
return
}
if ! s . isFirstCharAtLine {
s . indentState = IndentStateKeep
return
}
if s . prevIndentNum < s . indentNum {
s . indentLevel = s . prevIndentLevel + 1
s . indentState = IndentStateUp
} else if s . prevIndentNum == s . indentNum {
s . indentLevel = s . prevIndentLevel
s . indentState = IndentStateEqual
} else {
s . indentState = IndentStateDown
2019-10-21 14:54:26 +09:00
if s . prevIndentLevel > 0 {
s . indentLevel = s . prevIndentLevel - 1
}
2019-10-16 18:19:48 +09:00
}
2019-10-21 15:58:46 +09:00
2019-10-23 20:22:14 +09:00
if s . prevIndentColumn > 0 {
if s . prevIndentColumn < s . column {
2019-10-21 15:58:46 +09:00
s . indentState = IndentStateUp
2019-10-23 20:22:14 +09:00
} else if s . prevIndentColumn == s . column {
2019-10-21 15:58:46 +09:00
s . indentState = IndentStateEqual
} else {
s . indentState = IndentStateDown
}
}
2019-11-07 23:45:39 +09:00
s . isFirstCharAtLine = false
2019-11-07 23:54:32 +09:00
if s . isNeededKeepPreviousIndentNum ( ctx , c ) {
2019-11-07 23:45:39 +09:00
return
}
2019-10-16 18:19:48 +09:00
s . prevIndentNum = s . indentNum
2019-10-23 20:22:14 +09:00
s . prevIndentColumn = 0
2019-10-16 18:19:48 +09:00
s . prevIndentLevel = s . indentLevel
}
func ( s * Scanner ) isChangedToIndentStateDown ( ) bool {
return s . indentState == IndentStateDown
}
func ( s * Scanner ) isChangedToIndentStateUp ( ) bool {
return s . indentState == IndentStateUp
}
func ( s * Scanner ) isChangedToIndentStateEqual ( ) bool {
return s . indentState == IndentStateEqual
}
func ( s * Scanner ) addBufferedTokenIfExists ( ctx * Context ) {
ctx . addToken ( s . bufferedToken ( ctx ) )
}
func ( s * Scanner ) breakLiteral ( ctx * Context ) {
2019-11-08 01:04:29 +09:00
s . docStartColumn = 0
2019-10-16 18:19:48 +09:00
ctx . breakLiteral ( )
}
2020-05-29 18:09:51 +09:00
func ( s * Scanner ) scanSingleQuote ( ctx * Context ) ( tk * token . Token , pos int ) {
ctx . addOriginBuf ( '\'' )
2019-10-16 18:19:48 +09:00
startIndex := ctx . idx + 1
ctx . progress ( 1 )
2020-05-29 18:09:51 +09:00
src := ctx . src
size := len ( src )
value := [ ] rune { }
for idx := startIndex ; idx < size ; idx ++ {
c := src [ idx ]
2019-10-16 18:19:48 +09:00
pos = idx + 1
ctx . addOriginBuf ( c )
2020-05-29 18:09:51 +09:00
if c != '\'' {
value = append ( value , c )
continue
}
if idx + 1 < len ( ctx . src ) && ctx . src [ idx + 1 ] == '\'' {
// '' handle as ' character
value = append ( value , c )
2020-05-29 18:23:08 +09:00
ctx . addOriginBuf ( c )
2020-05-29 18:09:51 +09:00
idx ++
continue
}
tk = token . SingleQuote ( string ( value ) , string ( ctx . obuf ) , s . pos ( ) )
pos = len ( [ ] rune ( value ) ) + 1
return
}
return
}
func ( s * Scanner ) scanDoubleQuote ( ctx * Context ) ( tk * token . Token , pos int ) {
ctx . addOriginBuf ( '"' )
startIndex := ctx . idx + 1
ctx . progress ( 1 )
src := ctx . src
size := len ( src )
value := [ ] rune { }
for idx := startIndex ; idx < size ; idx ++ {
c := src [ idx ]
pos = idx + 1
ctx . addOriginBuf ( c )
if c == '\\' {
if idx + 1 < size {
nextChar := src [ idx + 1 ]
switch nextChar {
case '"' :
ctx . addOriginBuf ( nextChar )
value = append ( value , nextChar )
idx ++
continue
case '\\' :
ctx . addOriginBuf ( nextChar )
idx ++
}
2019-10-16 18:19:48 +09:00
}
2020-05-29 18:09:51 +09:00
value = append ( value , c )
continue
} else if c != '"' {
value = append ( value , c )
continue
2019-10-16 18:19:48 +09:00
}
2020-05-29 18:09:51 +09:00
tk = token . DoubleQuote ( string ( value ) , string ( ctx . obuf ) , s . pos ( ) )
pos = len ( [ ] rune ( value ) ) + 1
return
2019-10-16 18:19:48 +09:00
}
return
}
2020-05-29 18:09:51 +09:00
func ( s * Scanner ) scanQuote ( ctx * Context , ch rune ) ( tk * token . Token , pos int ) {
if ch == '\'' {
return s . scanSingleQuote ( ctx )
}
return s . scanDoubleQuote ( ctx )
}
2019-10-16 18:19:48 +09:00
func ( s * Scanner ) scanTag ( ctx * Context ) ( tk * token . Token , pos int ) {
ctx . addOriginBuf ( '!' )
ctx . progress ( 1 ) // skip '!' character
for idx , c := range ctx . src [ ctx . idx : ] {
pos = idx + 1
ctx . addOriginBuf ( c )
switch c {
2019-12-22 11:59:49 +09:00
case ' ' , '\n' , '\r' :
2019-10-16 18:19:48 +09:00
value := ctx . source ( ctx . idx - 1 , ctx . idx + idx )
tk = token . Tag ( value , string ( ctx . obuf ) , s . pos ( ) )
2019-11-07 19:16:19 +09:00
pos = len ( [ ] rune ( value ) )
2019-10-16 18:19:48 +09:00
return
}
}
return
}
func ( s * Scanner ) scanComment ( ctx * Context ) ( tk * token . Token , pos int ) {
ctx . addOriginBuf ( '#' )
ctx . progress ( 1 ) // skip '#' character
for idx , c := range ctx . src [ ctx . idx : ] {
pos = idx + 1
ctx . addOriginBuf ( c )
switch c {
2019-12-22 11:59:49 +09:00
case '\n' , '\r' :
2019-10-16 18:19:48 +09:00
if ctx . previousChar ( ) == '\\' {
continue
}
value := ctx . source ( ctx . idx , ctx . idx + idx )
tk = token . Comment ( value , string ( ctx . obuf ) , s . pos ( ) )
2019-11-07 19:16:19 +09:00
pos = len ( [ ] rune ( value ) ) + 1
2019-10-16 18:19:48 +09:00
return
}
}
return
}
func ( s * Scanner ) scanLiteral ( ctx * Context , c rune ) {
2019-11-08 16:48:54 +09:00
ctx . addOriginBuf ( c )
2019-10-16 18:19:48 +09:00
if ctx . isEOS ( ) {
2020-03-07 12:03:06 +09:00
if c != '\r' && c != '\n' {
ctx . addBuf ( c )
}
2019-10-16 18:19:48 +09:00
value := ctx . bufferedSrc ( )
2020-06-01 12:54:23 +09:00
ctx . addToken ( token . String ( string ( value ) , string ( ctx . obuf ) , s . pos ( ) ) )
2019-11-07 23:45:39 +09:00
ctx . resetBuffer ( )
2019-11-08 16:48:54 +09:00
s . progressColumn ( ctx , 1 )
2019-12-22 11:59:49 +09:00
} else if s . isNewLineChar ( c ) {
2019-10-16 18:19:48 +09:00
if ctx . isLiteral {
ctx . addBuf ( c )
} else {
ctx . addBuf ( ' ' )
}
s . progressLine ( ctx )
} else if s . isFirstCharAtLine && c == ' ' {
2019-11-08 01:04:29 +09:00
if 0 < s . docStartColumn && s . docStartColumn <= s . column {
ctx . addBuf ( c )
}
2019-10-16 18:19:48 +09:00
s . progressColumn ( ctx , 1 )
} else {
2019-11-08 01:04:29 +09:00
if s . docStartColumn == 0 {
s . docStartColumn = s . column
}
2019-10-16 18:19:48 +09:00
ctx . addBuf ( c )
s . progressColumn ( ctx , 1 )
}
}
func ( s * Scanner ) scanLiteralHeader ( ctx * Context ) ( pos int , err error ) {
header := ctx . currentChar ( )
ctx . addOriginBuf ( header )
ctx . progress ( 1 ) // skip '|' or '<' character
for idx , c := range ctx . src [ ctx . idx : ] {
pos = idx
ctx . addOriginBuf ( c )
switch c {
2019-12-22 11:59:49 +09:00
case '\n' , '\r' :
2019-10-16 18:19:48 +09:00
value := ctx . source ( ctx . idx , ctx . idx + idx )
opt := strings . TrimRight ( value , " " )
switch opt {
case "" , "+" , "-" ,
"0" , "1" , "2" , "3" , "4" , "5" , "6" , "7" , "8" , "9" :
if header == '|' {
ctx . addToken ( token . Literal ( "|" + opt , string ( ctx . obuf ) , s . pos ( ) ) )
ctx . isLiteral = true
} else if header == '>' {
ctx . addToken ( token . Folded ( ">" + opt , string ( ctx . obuf ) , s . pos ( ) ) )
ctx . isFolded = true
}
2019-11-08 16:48:54 +09:00
s . indentState = IndentStateKeep
2019-10-16 18:19:48 +09:00
ctx . resetBuffer ( )
ctx . literalOpt = opt
return
}
break
}
}
err = xerrors . New ( "invalid literal header" )
return
}
func ( s * Scanner ) scanNewLine ( ctx * Context , c rune ) {
if len ( ctx . buf ) > 0 && s . savedPos == nil {
s . savedPos = s . pos ( )
2019-12-28 22:37:50 +09:00
s . savedPos . Column -= len ( ctx . bufferedSrc ( ) )
2019-10-16 18:19:48 +09:00
}
2019-12-11 17:17:05 +09:00
// if the following case, origin buffer has unnecessary two spaces.
// So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too.
// ---
// a:[space][space]
// b: c
removedNum := ctx . removeRightSpaceFromBuf ( )
if removedNum > 0 {
s . column -= removedNum
s . offset -= removedNum
2019-12-28 22:20:45 +09:00
if s . savedPos != nil {
s . savedPos . Column -= removedNum
}
2019-12-11 17:17:05 +09:00
}
2019-10-16 18:19:48 +09:00
if ctx . isEOS ( ) {
s . addBufferedTokenIfExists ( ctx )
2019-10-17 15:44:55 +09:00
} else if s . isAnchor {
s . addBufferedTokenIfExists ( ctx )
2019-10-16 18:19:48 +09:00
}
ctx . addBuf ( ' ' )
ctx . addOriginBuf ( c )
2019-12-04 23:58:06 +09:00
ctx . isSingleLine = false
2019-10-16 18:19:48 +09:00
s . progressLine ( ctx )
}
func ( s * Scanner ) scan ( ctx * Context ) ( pos int ) {
for ctx . next ( ) {
pos = ctx . nextPos ( )
c := ctx . currentChar ( )
2019-11-07 23:45:39 +09:00
s . updateIndent ( ctx , c )
if ctx . isDocument ( ) {
if s . isChangedToIndentStateEqual ( ) ||
s . isChangedToIndentStateDown ( ) {
s . addBufferedTokenIfExists ( ctx )
s . breakLiteral ( ctx )
} else {
s . scanLiteral ( ctx , c )
continue
}
} else if s . isChangedToIndentStateDown ( ) {
2019-10-16 18:19:48 +09:00
s . addBufferedTokenIfExists ( ctx )
2019-10-25 15:03:08 +09:00
} else if s . isChangedToIndentStateEqual ( ) {
2019-12-22 11:59:49 +09:00
// if first character is new line character, buffer expect to raw folded literal
2019-12-22 17:28:30 +09:00
if len ( ctx . obuf ) > 0 && s . newLineCount ( ctx . obuf ) <= 1 {
2019-10-25 15:03:08 +09:00
// doesn't raw folded literal
s . addBufferedTokenIfExists ( ctx )
}
2019-10-16 18:19:48 +09:00
}
switch c {
case '{' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-11-06 19:28:47 +09:00
ctx . addOriginBuf ( c )
ctx . addToken ( token . MappingStart ( string ( ctx . obuf ) , s . pos ( ) ) )
2019-11-07 13:00:00 +09:00
s . startedFlowMapNum ++
2019-11-06 19:28:47 +09:00
s . progressColumn ( ctx , 1 )
return
}
2019-10-16 18:19:48 +09:00
case '}' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) || s . startedFlowMapNum > 0 {
2019-11-06 19:28:47 +09:00
ctx . addToken ( s . bufferedToken ( ctx ) )
ctx . addOriginBuf ( c )
ctx . addToken ( token . MappingEnd ( string ( ctx . obuf ) , s . pos ( ) ) )
2019-11-07 13:00:00 +09:00
s . startedFlowMapNum --
2019-11-06 19:28:47 +09:00
s . progressColumn ( ctx , 1 )
return
}
2019-10-16 18:19:48 +09:00
case '.' :
if s . indentNum == 0 && ctx . repeatNum ( '.' ) == 3 {
ctx . addToken ( token . DocumentEnd ( s . pos ( ) ) )
s . progressColumn ( ctx , 3 )
pos += 2
return
}
case '<' :
if ctx . repeatNum ( '<' ) == 2 {
2019-10-23 20:22:14 +09:00
s . prevIndentColumn = s . column
2019-10-17 01:58:10 +09:00
ctx . addToken ( token . MergeKey ( string ( ctx . obuf ) + "<<" , s . pos ( ) ) )
2019-10-16 18:19:48 +09:00
s . progressColumn ( ctx , 1 )
pos ++
return
}
case '-' :
if s . indentNum == 0 && ctx . repeatNum ( '-' ) == 3 {
s . addBufferedTokenIfExists ( ctx )
ctx . addToken ( token . DocumentHeader ( s . pos ( ) ) )
s . progressColumn ( ctx , 3 )
pos += 2
return
}
2019-12-28 22:37:50 +09:00
if ctx . existsBuffer ( ) && s . isChangedToIndentStateUp ( ) {
2019-10-16 18:19:48 +09:00
// raw folded
ctx . isRawFolded = true
ctx . addBuf ( c )
ctx . addOriginBuf ( c )
s . progressColumn ( ctx , 1 )
continue
}
2020-03-07 12:03:06 +09:00
if ctx . existsBuffer ( ) {
2019-12-04 23:58:06 +09:00
// '-' is literal
ctx . addBuf ( c )
ctx . addOriginBuf ( c )
s . progressColumn ( ctx , 1 )
continue
}
2019-10-16 18:19:48 +09:00
nc := ctx . nextChar ( )
2020-03-07 20:17:54 +09:00
if nc == ' ' || s . isNewLineChar ( nc ) {
2019-10-16 18:19:48 +09:00
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( c )
2019-10-23 20:22:14 +09:00
tk := token . SequenceEntry ( string ( ctx . obuf ) , s . pos ( ) )
s . prevIndentColumn = tk . Position . Column
ctx . addToken ( tk )
2019-10-16 18:19:48 +09:00
s . progressColumn ( ctx , 1 )
return
}
case '[' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-11-06 19:28:47 +09:00
ctx . addOriginBuf ( c )
ctx . addToken ( token . SequenceStart ( string ( ctx . obuf ) , s . pos ( ) ) )
2019-11-07 13:00:00 +09:00
s . startedFlowSequenceNum ++
2019-11-06 19:28:47 +09:00
s . progressColumn ( ctx , 1 )
return
}
2019-10-16 18:19:48 +09:00
case ']' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) || s . startedFlowSequenceNum > 0 {
2019-11-06 19:28:47 +09:00
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( c )
ctx . addToken ( token . SequenceEnd ( string ( ctx . obuf ) , s . pos ( ) ) )
2019-11-07 13:00:00 +09:00
s . startedFlowSequenceNum --
2019-11-06 19:28:47 +09:00
s . progressColumn ( ctx , 1 )
return
}
2019-10-16 18:19:48 +09:00
case ',' :
2019-11-07 13:00:00 +09:00
if s . startedFlowSequenceNum > 0 || s . startedFlowMapNum > 0 {
2019-11-06 19:28:47 +09:00
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( c )
ctx . addToken ( token . CollectEntry ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
return
}
2019-10-16 18:19:48 +09:00
case ':' :
nc := ctx . nextChar ( )
2019-12-22 11:59:49 +09:00
if nc == ' ' || s . isNewLineChar ( nc ) || ctx . isNextEOS ( ) {
2019-10-16 18:19:48 +09:00
// mapping value
2019-10-17 15:44:55 +09:00
tk := s . bufferedToken ( ctx )
if tk != nil {
2019-10-23 20:22:14 +09:00
s . prevIndentColumn = tk . Position . Column
2019-10-17 15:44:55 +09:00
ctx . addToken ( tk )
}
2019-10-16 18:19:48 +09:00
ctx . addToken ( token . MappingValue ( s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
return
}
case '|' , '>' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-10-16 18:19:48 +09:00
progress , err := s . scanLiteralHeader ( ctx )
if err != nil {
// TODO: returns syntax error object
return
}
s . progressColumn ( ctx , progress )
s . progressLine ( ctx )
continue
}
case '!' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-11-06 19:28:47 +09:00
token , progress := s . scanTag ( ctx )
ctx . addToken ( token )
s . progressColumn ( ctx , progress )
2019-12-22 11:59:49 +09:00
if c := ctx . previousChar ( ) ; s . isNewLineChar ( c ) {
2019-11-06 19:28:47 +09:00
s . progressLine ( ctx )
}
pos += progress
return
2019-10-16 18:19:48 +09:00
}
case '%' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) && s . indentNum == 0 {
2019-10-16 18:19:48 +09:00
ctx . addToken ( token . Directive ( s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
return
}
case '?' :
nc := ctx . nextChar ( )
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) && nc == ' ' {
2019-10-16 18:19:48 +09:00
ctx . addToken ( token . Directive ( s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
return
}
case '&' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-11-06 19:28:47 +09:00
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( c )
ctx . addToken ( token . Anchor ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
s . isAnchor = true
return
}
2019-10-16 18:19:48 +09:00
case '*' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-11-06 19:28:47 +09:00
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( c )
ctx . addToken ( token . Alias ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
return
}
2019-10-16 18:19:48 +09:00
case '#' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) || ctx . previousChar ( ) == ' ' {
2019-11-12 19:50:58 +09:00
s . addBufferedTokenIfExists ( ctx )
token , progress := s . scanComment ( ctx )
ctx . addToken ( token )
s . progressColumn ( ctx , progress )
s . progressLine ( ctx )
pos += progress
return
}
2019-10-16 18:19:48 +09:00
case '\'' , '"' :
2019-12-28 22:37:50 +09:00
if ! ctx . existsBuffer ( ) {
2019-11-07 18:08:12 +09:00
token , progress := s . scanQuote ( ctx , c )
ctx . addToken ( token )
s . progressColumn ( ctx , progress )
pos += progress
return
}
2019-11-09 17:20:39 +09:00
case '\r' , '\n' :
// There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec.
// > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character.
// > Outside scalar content, YAML allows any line break to be used to terminate lines.
// > -- https://yaml.org/spec/1.2/spec.html
if c == '\r' && ctx . nextChar ( ) == '\n' {
ctx . addOriginBuf ( '\r' )
ctx . progress ( 1 )
c = '\n'
}
2019-10-16 18:19:48 +09:00
s . scanNewLine ( ctx , c )
continue
case ' ' :
if ctx . isSaveIndentMode ( ) || ( ! s . isAnchor && ! s . isFirstCharAtLine ) {
ctx . addBuf ( c )
ctx . addOriginBuf ( c )
s . progressColumn ( ctx , 1 )
continue
}
if s . isFirstCharAtLine {
s . progressColumn ( ctx , 1 )
ctx . addOriginBuf ( c )
continue
}
s . addBufferedTokenIfExists ( ctx )
s . progressColumn ( ctx , 1 )
s . isAnchor = false
return
}
ctx . addBuf ( c )
ctx . addOriginBuf ( c )
s . progressColumn ( ctx , 1 )
}
2019-10-23 03:21:42 +09:00
s . addBufferedTokenIfExists ( ctx )
2019-10-16 18:19:48 +09:00
return
}
2019-10-21 12:53:30 +09:00
// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src.
2019-12-29 11:37:20 +09:00
func ( s * Scanner ) Init ( text string ) {
src := [ ] rune ( text )
2019-10-16 18:19:48 +09:00
s . source = src
s . sourcePos = 0
s . sourceSize = len ( src )
s . line = 1
s . column = 1
s . offset = 1
s . prevIndentLevel = 0
s . prevIndentNum = 0
2019-10-23 20:22:14 +09:00
s . prevIndentColumn = 0
2019-10-16 18:19:48 +09:00
s . indentLevel = 0
s . indentNum = 0
s . isFirstCharAtLine = true
}
2019-10-21 12:53:30 +09:00
// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
2019-10-16 18:19:48 +09:00
func ( s * Scanner ) Scan ( ) ( token . Tokens , error ) {
if s . sourcePos >= s . sourceSize {
return nil , io . EOF
}
ctx := newContext ( s . source [ s . sourcePos : ] )
2019-12-29 11:47:34 +09:00
defer ctx . release ( )
2019-10-16 18:19:48 +09:00
progress := s . scan ( ctx )
s . sourcePos += progress
2019-12-29 11:47:34 +09:00
var tokens token . Tokens
tokens = append ( tokens , ctx . tokens ... )
return tokens , nil
2019-10-16 18:19:48 +09:00
}