2019-10-16 18:19:48 +09:00
package scanner
import (
2024-10-28 21:24:15 +09:00
"errors"
2024-10-29 20:00:48 +09:00
"fmt"
2019-10-16 18:19:48 +09:00
"io"
2024-11-03 02:11:50 +09:00
"strconv"
2019-10-16 18:19:48 +09:00
"strings"
2024-07-16 12:30:20 +02:00
"github.com/goccy/go-yaml/token"
2019-10-16 18:19:48 +09:00
)
2019-10-21 12:53:30 +09:00
// IndentState state for indent
2019-10-16 18:19:48 +09:00
type IndentState int
const (
2019-10-21 12:53:30 +09:00
// IndentStateEqual equals previous indent
2019-10-16 18:19:48 +09:00
IndentStateEqual IndentState = iota
2019-10-21 12:53:30 +09:00
// IndentStateUp more indent than previous
2019-10-16 18:19:48 +09:00
IndentStateUp
2019-10-21 12:53:30 +09:00
// IndentStateDown less indent than previous
2019-10-16 18:19:48 +09:00
IndentStateDown
2019-10-21 12:53:30 +09:00
// IndentStateKeep uses not indent token
2019-10-16 18:19:48 +09:00
IndentStateKeep
)
2019-10-21 12:53:30 +09:00
// Scanner holds the scanner's internal state while processing a given text.
// It can be allocated as part of another data structure but must be initialized via Init before use.
2019-10-16 18:19:48 +09:00
type Scanner struct {
2024-10-28 15:59:31 +09:00
source [ ] rune
sourcePos int
sourceSize int
// line number. This number starts from 1.
line int
// column number. This number starts from 1.
column int
// offset represents the offset from the beginning of the source.
offset int
// lastDelimColumn is the last column needed to compare indent is retained.
lastDelimColumn int
// indentNum indicates the number of spaces used for indentation.
indentNum int
// prevLineIndentNum indicates the number of spaces used for indentation at previous line.
prevLineIndentNum int
// indentLevel indicates the level of indent depth. This value does not match the column value.
2019-11-07 13:00:00 +09:00
indentLevel int
isFirstCharAtLine bool
isAnchor bool
startedFlowSequenceNum int
startedFlowMapNum int
indentState IndentState
savedPos * token . Position
2019-10-16 18:19:48 +09:00
}
func ( s * Scanner ) pos ( ) * token . Position {
return & token . Position {
Line : s . line ,
Column : s . column ,
Offset : s . offset ,
IndentNum : s . indentNum ,
IndentLevel : s . indentLevel ,
}
}
func ( s * Scanner ) bufferedToken ( ctx * Context ) * token . Token {
if s . savedPos != nil {
tk := ctx . bufferedToken ( s . savedPos )
s . savedPos = nil
return tk
}
2022-12-19 16:04:56 +10:30
line := s . line
column := s . column - len ( ctx . buf )
level := s . indentLevel
2024-10-29 20:00:48 +09:00
if ctx . isDocument ( ) {
2022-12-19 16:04:56 +10:30
line -= s . newLineCount ( ctx . buf )
column = strings . Index ( string ( ctx . obuf ) , string ( ctx . buf ) ) + 1
// Since we are in a literal, folded or raw folded
// we can use the indent level from the last token.
last := ctx . lastToken ( )
if last != nil { // The last token should never be nil here.
level = last . Position . IndentLevel + 1
}
}
2019-10-16 18:19:48 +09:00
return ctx . bufferedToken ( & token . Position {
2022-12-19 16:04:56 +10:30
Line : line ,
Column : column ,
Offset : s . offset - len ( ctx . buf ) ,
2019-10-16 18:19:48 +09:00
IndentNum : s . indentNum ,
2022-12-19 16:04:56 +10:30
IndentLevel : level ,
2019-10-16 18:19:48 +09:00
} )
}
func ( s * Scanner ) progressColumn ( ctx * Context , num int ) {
s . column += num
s . offset += num
2024-10-29 20:00:48 +09:00
s . progress ( ctx , num )
2019-10-16 18:19:48 +09:00
}
func ( s * Scanner ) progressLine ( ctx * Context ) {
2024-10-28 15:59:31 +09:00
s . prevLineIndentNum = s . indentNum
2019-10-16 18:19:48 +09:00
s . column = 1
s . line ++
s . offset ++
s . indentNum = 0
s . isFirstCharAtLine = true
s . isAnchor = false
2024-10-29 20:00:48 +09:00
s . progress ( ctx , 1 )
}
func ( s * Scanner ) progress ( ctx * Context , num int ) {
ctx . progress ( num )
s . sourcePos += num
2019-10-16 18:19:48 +09:00
}
2019-12-22 11:59:49 +09:00
func ( s * Scanner ) isNewLineChar ( c rune ) bool {
if c == '\n' {
return true
}
if c == '\r' {
return true
}
return false
}
2019-12-22 17:28:30 +09:00
func ( s * Scanner ) newLineCount ( src [ ] rune ) int {
size := len ( src )
cnt := 0
for i := 0 ; i < size ; i ++ {
c := src [ i ]
switch c {
case '\r' :
if i + 1 < size && src [ i + 1 ] == '\n' {
i ++
}
cnt ++
case '\n' :
cnt ++
}
}
return cnt
}
2024-10-28 15:59:31 +09:00
func ( s * Scanner ) updateIndentLevel ( ) {
if s . prevLineIndentNum < s . indentNum {
s . indentLevel ++
} else if s . prevLineIndentNum > s . indentNum {
if s . indentLevel > 0 {
s . indentLevel --
2019-10-21 14:54:26 +09:00
}
2019-10-16 18:19:48 +09:00
}
2024-10-28 15:59:31 +09:00
}
func ( s * Scanner ) updateIndentState ( ctx * Context ) {
if s . lastDelimColumn > 0 {
if s . lastDelimColumn < s . column {
2019-10-21 15:58:46 +09:00
s . indentState = IndentStateUp
2022-01-11 20:37:31 +09:00
} else {
2024-10-28 17:07:44 +09:00
// If lastDelimColumn and s.column are the same,
// treat as Down state since it is the same column as delimiter.
s . indentState = IndentStateDown
2019-10-21 15:58:46 +09:00
}
2022-01-11 20:37:31 +09:00
} else {
2024-10-28 15:59:31 +09:00
s . indentState = s . indentStateFromIndentNumDifference ( )
2022-01-11 20:37:31 +09:00
}
}
2024-10-28 17:07:44 +09:00
func ( s * Scanner ) indentStateFromIndentNumDifference ( ) IndentState {
switch {
case s . prevLineIndentNum < s . indentNum :
return IndentStateUp
case s . prevLineIndentNum == s . indentNum :
return IndentStateEqual
default :
return IndentStateDown
}
}
2022-01-11 20:37:31 +09:00
func ( s * Scanner ) updateIndent ( ctx * Context , c rune ) {
2024-11-09 13:00:12 +09:00
if s . isFirstCharAtLine && s . isNewLineChar ( c ) {
2022-01-11 20:37:31 +09:00
return
}
if s . isFirstCharAtLine && c == ' ' {
s . indentNum ++
return
}
if ! s . isFirstCharAtLine {
s . indentState = IndentStateKeep
return
2019-10-21 15:58:46 +09:00
}
2024-10-28 17:07:44 +09:00
s . updateIndentLevel ( )
2022-01-11 20:37:31 +09:00
s . updateIndentState ( ctx )
2019-11-07 23:45:39 +09:00
s . isFirstCharAtLine = false
2019-10-16 18:19:48 +09:00
}
func ( s * Scanner ) isChangedToIndentStateDown ( ) bool {
return s . indentState == IndentStateDown
}
func ( s * Scanner ) isChangedToIndentStateUp ( ) bool {
return s . indentState == IndentStateUp
}
func ( s * Scanner ) addBufferedTokenIfExists ( ctx * Context ) {
ctx . addToken ( s . bufferedToken ( ctx ) )
}
2024-11-03 02:11:50 +09:00
func ( s * Scanner ) breakDocument ( ctx * Context ) {
ctx . breakDocument ( )
2019-10-16 18:19:48 +09:00
}
2024-10-31 22:54:26 +09:00
func ( s * Scanner ) scanSingleQuote ( ctx * Context ) ( * token . Token , error ) {
2020-05-29 18:09:51 +09:00
ctx . addOriginBuf ( '\'' )
2021-02-18 21:18:38 +00:00
srcpos := s . pos ( )
2019-10-16 18:19:48 +09:00
startIndex := ctx . idx + 1
2020-05-29 18:09:51 +09:00
src := ctx . src
size := len ( src )
value := [ ] rune { }
2020-06-15 17:30:27 +09:00
isFirstLineChar := false
2021-03-01 15:09:40 +00:00
isNewLine := false
2024-10-29 20:00:48 +09:00
2020-05-29 18:09:51 +09:00
for idx := startIndex ; idx < size ; idx ++ {
2021-03-01 15:09:40 +00:00
if ! isNewLine {
s . progressColumn ( ctx , 1 )
} else {
isNewLine = false
}
2020-05-29 18:09:51 +09:00
c := src [ idx ]
2019-10-16 18:19:48 +09:00
ctx . addOriginBuf ( c )
2020-06-15 17:30:27 +09:00
if s . isNewLineChar ( c ) {
value = append ( value , ' ' )
isFirstLineChar = true
2021-03-01 15:09:40 +00:00
isNewLine = true
s . progressLine ( ctx )
2020-06-15 17:30:27 +09:00
continue
} else if c == ' ' && isFirstLineChar {
continue
} else if c != '\'' {
2020-05-29 18:09:51 +09:00
value = append ( value , c )
2020-06-15 17:30:27 +09:00
isFirstLineChar = false
2020-05-29 18:09:51 +09:00
continue
}
if idx + 1 < len ( ctx . src ) && ctx . src [ idx + 1 ] == '\'' {
// '' handle as ' character
value = append ( value , c )
2020-05-29 18:23:08 +09:00
ctx . addOriginBuf ( c )
2020-05-29 18:09:51 +09:00
idx ++
2024-10-29 20:00:48 +09:00
s . progressColumn ( ctx , 1 )
2020-05-29 18:09:51 +09:00
continue
}
2021-03-01 15:09:40 +00:00
s . progressColumn ( ctx , 1 )
2024-10-31 22:54:26 +09:00
return token . SingleQuote ( string ( value ) , string ( ctx . obuf ) , srcpos ) , nil
2020-05-29 18:09:51 +09:00
}
2024-10-31 22:54:26 +09:00
s . progressColumn ( ctx , 1 )
return nil , ErrInvalidToken (
"could not find end character of single-quotated text" ,
token . Invalid ( string ( ctx . obuf ) , srcpos ) ,
)
2020-05-29 18:09:51 +09:00
}
2020-06-20 14:10:17 +09:00
func hexToInt ( b rune ) int {
if b >= 'A' && b <= 'F' {
return int ( b ) - 'A' + 10
}
if b >= 'a' && b <= 'f' {
return int ( b ) - 'a' + 10
}
return int ( b ) - '0'
}
func hexRunesToInt ( b [ ] rune ) int {
sum := 0
for i := 0 ; i < len ( b ) ; i ++ {
sum += hexToInt ( b [ i ] ) << ( uint ( len ( b ) - i - 1 ) * 4 )
}
return sum
}
2024-10-31 22:54:26 +09:00
func ( s * Scanner ) scanDoubleQuote ( ctx * Context ) ( * token . Token , error ) {
2020-05-29 18:09:51 +09:00
ctx . addOriginBuf ( '"' )
2021-02-18 21:18:38 +00:00
srcpos := s . pos ( )
2020-05-29 18:09:51 +09:00
startIndex := ctx . idx + 1
src := ctx . src
size := len ( src )
value := [ ] rune { }
2020-06-15 17:30:27 +09:00
isFirstLineChar := false
2021-03-01 15:09:40 +00:00
isNewLine := false
2024-10-29 20:00:48 +09:00
2020-05-29 18:09:51 +09:00
for idx := startIndex ; idx < size ; idx ++ {
2021-03-01 15:09:40 +00:00
if ! isNewLine {
s . progressColumn ( ctx , 1 )
} else {
isNewLine = false
}
2020-05-29 18:09:51 +09:00
c := src [ idx ]
ctx . addOriginBuf ( c )
2020-06-15 17:30:27 +09:00
if s . isNewLineChar ( c ) {
2024-11-12 00:09:28 +09:00
if isFirstLineChar {
if value [ len ( value ) - 1 ] == ' ' {
value [ len ( value ) - 1 ] = '\n'
} else {
value = append ( value , '\n' )
}
} else {
value = append ( value , ' ' )
}
2020-06-15 17:30:27 +09:00
isFirstLineChar = true
2021-03-01 15:09:40 +00:00
isNewLine = true
s . progressLine ( ctx )
2020-06-15 17:30:27 +09:00
continue
} else if c == ' ' && isFirstLineChar {
continue
} else if c == '\\' {
2020-06-20 14:10:17 +09:00
isFirstLineChar = false
2024-07-16 12:30:20 +02:00
if idx + 1 >= size {
value = append ( value , c )
continue
}
nextChar := src [ idx + 1 ]
progress := 0
switch nextChar {
case 'b' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , '\b' )
case 'e' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , '\x1B' )
case 'f' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , '\f' )
case 'n' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , '\n' )
case 'r' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , '\r' )
case 'v' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , '\v' )
case 'L' : // LS (#x2028)
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , [ ] rune { '\xE2' , '\x80' , '\xA8' } ... )
case 'N' : // NEL (#x85)
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , [ ] rune { '\xC2' , '\x85' } ... )
case 'P' : // PS (#x2029)
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , [ ] rune { '\xE2' , '\x80' , '\xA9' } ... )
case '_' : // #xA0
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , [ ] rune { '\xC2' , '\xA0' } ... )
case '"' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , nextChar )
case 'x' :
2024-10-29 20:00:48 +09:00
if idx + 3 >= size {
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , nextChar )
} else {
progress = 3
codeNum := hexRunesToInt ( src [ idx + 2 : idx + progress + 1 ] )
value = append ( value , rune ( codeNum ) )
2020-05-29 18:09:51 +09:00
}
2024-07-16 12:30:20 +02:00
case 'u' :
2024-10-29 20:00:48 +09:00
if idx + 5 >= size {
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , nextChar )
} else {
progress = 5
codeNum := hexRunesToInt ( src [ idx + 2 : idx + progress + 1 ] )
value = append ( value , rune ( codeNum ) )
2024-07-16 12:30:20 +02:00
}
case 'U' :
2024-10-29 20:00:48 +09:00
if idx + 9 >= size {
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , nextChar )
} else {
progress = 9
codeNum := hexRunesToInt ( src [ idx + 2 : idx + progress + 1 ] )
value = append ( value , rune ( codeNum ) )
2024-07-16 12:30:20 +02:00
}
case '\\' :
progress = 1
ctx . addOriginBuf ( nextChar )
value = append ( value , c )
2024-11-12 00:09:28 +09:00
case '\n' :
isFirstLineChar = true
isNewLine = true
ctx . addOriginBuf ( nextChar )
s . progressColumn ( ctx , 1 )
s . progressLine ( ctx )
idx ++
continue
case ' ' :
// skip escape character.
2024-07-16 12:30:20 +02:00
default :
value = append ( value , c )
2019-10-16 18:19:48 +09:00
}
2024-07-16 12:30:20 +02:00
idx += progress
s . progressColumn ( ctx , progress )
2020-05-29 18:09:51 +09:00
continue
} else if c != '"' {
value = append ( value , c )
2020-06-15 17:30:27 +09:00
isFirstLineChar = false
2020-05-29 18:09:51 +09:00
continue
2019-10-16 18:19:48 +09:00
}
2021-03-01 15:09:40 +00:00
s . progressColumn ( ctx , 1 )
2024-10-31 22:54:26 +09:00
return token . DoubleQuote ( string ( value ) , string ( ctx . obuf ) , srcpos ) , nil
2019-10-16 18:19:48 +09:00
}
2024-10-31 22:54:26 +09:00
s . progressColumn ( ctx , 1 )
return nil , ErrInvalidToken (
"could not find end character of double-quotated text" ,
token . Invalid ( string ( ctx . obuf ) , srcpos ) ,
)
2019-10-16 18:19:48 +09:00
}
2024-10-31 22:54:26 +09:00
func ( s * Scanner ) scanQuote ( ctx * Context , ch rune ) ( bool , error ) {
2024-10-30 02:18:20 +09:00
if ctx . existsBuffer ( ) {
2024-10-31 22:54:26 +09:00
return false , nil
2024-10-30 02:18:20 +09:00
}
2020-05-29 18:09:51 +09:00
if ch == '\'' {
2024-10-31 22:54:26 +09:00
tk , err := s . scanSingleQuote ( ctx )
if err != nil {
return false , err
}
ctx . addToken ( tk )
2024-10-30 02:18:20 +09:00
} else {
2024-10-31 22:54:26 +09:00
tk , err := s . scanDoubleQuote ( ctx )
if err != nil {
return false , err
}
ctx . addToken ( tk )
2020-05-29 18:09:51 +09:00
}
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-31 22:54:26 +09:00
return true , nil
2024-10-30 02:18:20 +09:00
}
func ( s * Scanner ) scanWhiteSpace ( ctx * Context ) bool {
if ctx . isDocument ( ) {
return false
}
if ! s . isAnchor && ! s . isFirstCharAtLine {
return false
}
if s . isFirstCharAtLine {
s . progressColumn ( ctx , 1 )
ctx . addOriginBuf ( ' ' )
return true
}
s . addBufferedTokenIfExists ( ctx )
s . isAnchor = false
return true
2020-05-29 18:09:51 +09:00
}
2021-03-01 17:32:11 +09:00
func ( s * Scanner ) isMergeKey ( ctx * Context ) bool {
if ctx . repeatNum ( '<' ) != 2 {
return false
}
src := ctx . src
size := len ( src )
for idx := ctx . idx + 2 ; idx < size ; idx ++ {
c := src [ idx ]
if c == ' ' {
continue
}
if c != ':' {
return false
}
if idx + 1 < size {
nc := src [ idx + 1 ]
if nc == ' ' || s . isNewLineChar ( nc ) {
return true
}
}
}
return false
}
2024-10-30 02:18:20 +09:00
func ( s * Scanner ) scanTag ( ctx * Context ) bool {
if ctx . existsBuffer ( ) {
return false
}
2019-10-16 18:19:48 +09:00
ctx . addOriginBuf ( '!' )
2024-10-29 20:00:48 +09:00
s . progress ( ctx , 1 ) // skip '!' character
2024-10-30 02:18:20 +09:00
var progress int
2019-10-16 18:19:48 +09:00
for idx , c := range ctx . src [ ctx . idx : ] {
2024-10-29 20:00:48 +09:00
progress = idx + 1
2019-10-16 18:19:48 +09:00
ctx . addOriginBuf ( c )
switch c {
2019-12-22 11:59:49 +09:00
case ' ' , '\n' , '\r' :
2019-10-16 18:19:48 +09:00
value := ctx . source ( ctx . idx - 1 , ctx . idx + idx )
2024-10-30 02:18:20 +09:00
ctx . addToken ( token . Tag ( value , string ( ctx . obuf ) , s . pos ( ) ) )
2024-10-29 20:00:48 +09:00
progress = len ( [ ] rune ( value ) )
goto END
2019-10-16 18:19:48 +09:00
}
}
2024-10-29 20:00:48 +09:00
END :
s . progressColumn ( ctx , progress )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
return true
2019-10-16 18:19:48 +09:00
}
2024-10-30 02:18:20 +09:00
func ( s * Scanner ) scanComment ( ctx * Context ) bool {
if ctx . existsBuffer ( ) && ctx . previousChar ( ) != ' ' {
return false
}
s . addBufferedTokenIfExists ( ctx )
2019-10-16 18:19:48 +09:00
ctx . addOriginBuf ( '#' )
2024-10-29 20:00:48 +09:00
s . progress ( ctx , 1 ) // skip '#' character
2019-10-16 18:19:48 +09:00
for idx , c := range ctx . src [ ctx . idx : ] {
ctx . addOriginBuf ( c )
switch c {
2019-12-22 11:59:49 +09:00
case '\n' , '\r' :
2019-10-16 18:19:48 +09:00
if ctx . previousChar ( ) == '\\' {
continue
}
value := ctx . source ( ctx . idx , ctx . idx + idx )
2024-10-29 20:00:48 +09:00
progress := len ( [ ] rune ( value ) )
2024-10-30 02:18:20 +09:00
ctx . addToken ( token . Comment ( value , string ( ctx . obuf ) , s . pos ( ) ) )
2024-10-29 20:00:48 +09:00
s . progressColumn ( ctx , progress )
s . progressLine ( ctx )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
return true
2019-10-16 18:19:48 +09:00
}
}
2023-03-01 16:59:07 +09:00
// document ends with comment.
value := string ( ctx . src [ ctx . idx : ] )
2024-10-30 02:18:20 +09:00
ctx . addToken ( token . Comment ( value , string ( ctx . obuf ) , s . pos ( ) ) )
2024-10-29 20:00:48 +09:00
progress := len ( [ ] rune ( value ) )
s . progressColumn ( ctx , progress )
s . progressLine ( ctx )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
return true
2019-10-16 18:19:48 +09:00
}
2024-11-03 02:11:50 +09:00
func ( s * Scanner ) trimCommentFromDocumentOpt ( text string , header rune ) ( string , error ) {
2021-07-19 18:48:09 +09:00
idx := strings . Index ( text , "#" )
if idx < 0 {
return text , nil
}
if idx == 0 {
2024-10-29 20:00:48 +09:00
return "" , ErrInvalidToken (
2024-11-03 02:11:50 +09:00
fmt . Sprintf ( "invalid document header %s" , text ) ,
2024-10-29 20:00:48 +09:00
token . Invalid ( string ( header ) + text , s . pos ( ) ) ,
)
2021-07-19 18:48:09 +09:00
}
return text [ : idx - 1 ] , nil
}
2024-11-03 02:11:50 +09:00
func ( s * Scanner ) scanDocument ( ctx * Context , c rune ) error {
2019-11-08 16:48:54 +09:00
ctx . addOriginBuf ( c )
2019-10-16 18:19:48 +09:00
if ctx . isEOS ( ) {
2024-11-03 02:11:50 +09:00
ctx . updateDocumentLineIndentColumn ( s . column )
if err := ctx . validateDocumentLineIndentColumn ( ) ; err != nil {
invalidTk := token . Invalid ( string ( ctx . obuf ) , s . pos ( ) )
s . progressColumn ( ctx , 1 )
return ErrInvalidToken ( err . Error ( ) , invalidTk )
}
2024-11-09 13:00:12 +09:00
ctx . addBuf ( c )
2019-10-16 18:19:48 +09:00
value := ctx . bufferedSrc ( )
2020-06-01 12:54:23 +09:00
ctx . addToken ( token . String ( string ( value ) , string ( ctx . obuf ) , s . pos ( ) ) )
2019-11-07 23:45:39 +09:00
ctx . resetBuffer ( )
2019-11-08 16:48:54 +09:00
s . progressColumn ( ctx , 1 )
2019-12-22 11:59:49 +09:00
} else if s . isNewLineChar ( c ) {
2024-11-09 13:00:12 +09:00
ctx . addBuf ( c )
2024-11-03 02:11:50 +09:00
ctx . updateDocumentNewLineState ( )
2019-10-16 18:19:48 +09:00
s . progressLine ( ctx )
} else if s . isFirstCharAtLine && c == ' ' {
2024-11-03 02:11:50 +09:00
ctx . addDocumentIndent ( s . column )
2019-10-16 18:19:48 +09:00
s . progressColumn ( ctx , 1 )
} else {
2024-11-03 02:11:50 +09:00
ctx . updateDocumentLineIndentColumn ( s . column )
2024-11-09 13:00:12 +09:00
if ctx . docFirstLineIndentColumn > 0 {
s . lastDelimColumn = ctx . docFirstLineIndentColumn - 1
}
2024-11-03 02:11:50 +09:00
if err := ctx . validateDocumentLineIndentColumn ( ) ; err != nil {
invalidTk := token . Invalid ( string ( ctx . obuf ) , s . pos ( ) )
s . progressColumn ( ctx , 1 )
return ErrInvalidToken ( err . Error ( ) , invalidTk )
2019-11-08 01:04:29 +09:00
}
2024-11-09 13:00:12 +09:00
ctx . updateDocumentNewLineInFolded ( s . column )
2019-10-16 18:19:48 +09:00
ctx . addBuf ( c )
s . progressColumn ( ctx , 1 )
}
2024-11-03 02:11:50 +09:00
return nil
2019-10-16 18:19:48 +09:00
}
2024-10-28 18:15:16 +09:00
func ( s * Scanner ) scanNewLine ( ctx * Context , c rune ) {
if len ( ctx . buf ) > 0 && s . savedPos == nil {
2024-11-12 16:44:43 +09:00
bufLen := len ( ctx . bufferedSrc ( ) )
2024-10-28 18:15:16 +09:00
s . savedPos = s . pos ( )
2024-11-12 16:44:43 +09:00
s . savedPos . Column -= bufLen
s . savedPos . Offset -= bufLen
2024-10-28 18:15:16 +09:00
}
// if the following case, origin buffer has unnecessary two spaces.
// So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too.
// ---
// a:[space][space]
// b: c
removedNum := ctx . removeRightSpaceFromBuf ( )
if removedNum > 0 {
s . column -= removedNum
s . offset -= removedNum
if s . savedPos != nil {
s . savedPos . Column -= removedNum
}
}
// There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec.
// > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character.
// > Outside scalar content, YAML allows any line break to be used to terminate lines.
// > -- https://yaml.org/spec/1.2/spec.html
if c == '\r' && ctx . nextChar ( ) == '\n' {
ctx . addOriginBuf ( '\r' )
2024-10-29 20:00:48 +09:00
s . progress ( ctx , 1 )
2024-11-12 16:44:43 +09:00
s . offset ++
2024-10-28 18:15:16 +09:00
c = '\n'
}
if ctx . isEOS ( ) {
s . addBufferedTokenIfExists ( ctx )
} else if s . isAnchor {
s . addBufferedTokenIfExists ( ctx )
}
2024-11-09 13:00:12 +09:00
if ctx . existsBuffer ( ) && s . isFirstCharAtLine {
if ctx . buf [ len ( ctx . buf ) - 1 ] == ' ' {
ctx . buf [ len ( ctx . buf ) - 1 ] = '\n'
} else {
ctx . buf = append ( ctx . buf , '\n' )
}
} else {
ctx . addBuf ( ' ' )
}
2024-10-28 18:15:16 +09:00
ctx . addOriginBuf ( c )
s . progressLine ( ctx )
}
2024-11-01 15:03:27 +09:00
func ( s * Scanner ) isFlowMode ( ) bool {
if s . startedFlowSequenceNum > 0 {
return true
}
if s . startedFlowMapNum > 0 {
return true
}
return false
}
2024-10-28 18:15:16 +09:00
func ( s * Scanner ) scanFlowMapStart ( ctx * Context ) bool {
2024-11-01 15:03:27 +09:00
if ctx . existsBuffer ( ) && ! s . isFlowMode ( ) {
2024-10-28 18:15:16 +09:00
return false
}
2024-11-01 15:03:27 +09:00
s . addBufferedTokenIfExists ( ctx )
2024-10-28 18:15:16 +09:00
ctx . addOriginBuf ( '{' )
ctx . addToken ( token . MappingStart ( string ( ctx . obuf ) , s . pos ( ) ) )
s . startedFlowMapNum ++
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanFlowMapEnd ( ctx * Context ) bool {
if s . startedFlowMapNum <= 0 {
return false
}
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( '}' )
ctx . addToken ( token . MappingEnd ( string ( ctx . obuf ) , s . pos ( ) ) )
s . startedFlowMapNum --
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanFlowArrayStart ( ctx * Context ) bool {
2024-11-01 15:03:27 +09:00
if ctx . existsBuffer ( ) && ! s . isFlowMode ( ) {
2024-10-28 18:15:16 +09:00
return false
}
2024-11-01 15:03:27 +09:00
s . addBufferedTokenIfExists ( ctx )
2024-10-28 18:15:16 +09:00
ctx . addOriginBuf ( '[' )
ctx . addToken ( token . SequenceStart ( string ( ctx . obuf ) , s . pos ( ) ) )
s . startedFlowSequenceNum ++
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanFlowArrayEnd ( ctx * Context ) bool {
2024-10-31 15:35:43 +09:00
if ctx . existsBuffer ( ) && s . startedFlowSequenceNum <= 0 {
2024-10-28 18:15:16 +09:00
return false
}
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( ']' )
ctx . addToken ( token . SequenceEnd ( string ( ctx . obuf ) , s . pos ( ) ) )
s . startedFlowSequenceNum --
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanFlowEntry ( ctx * Context , c rune ) bool {
if s . startedFlowSequenceNum <= 0 && s . startedFlowMapNum <= 0 {
return false
}
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( c )
ctx . addToken ( token . CollectEntry ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanMapDelim ( ctx * Context ) bool {
nc := ctx . nextChar ( )
if s . startedFlowMapNum <= 0 && nc != ' ' && ! s . isNewLineChar ( nc ) && ! ctx . isNextEOS ( ) {
return false
}
// mapping value
tk := s . bufferedToken ( ctx )
if tk != nil {
s . lastDelimColumn = tk . Position . Column
ctx . addToken ( tk )
} else if tk := ctx . lastToken ( ) ; tk != nil {
// If the map key is quote, the buffer does not exist because it has already been cut into tokens.
// Therefore, we need to check the last token.
if tk . Indicator == token . QuotedScalarIndicator {
s . lastDelimColumn = tk . Position . Column
}
}
ctx . addToken ( token . MappingValue ( s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanDocumentStart ( ctx * Context ) bool {
if s . indentNum != 0 {
return false
}
if s . column != 1 {
return false
}
if ctx . repeatNum ( '-' ) != 3 {
return false
}
s . addBufferedTokenIfExists ( ctx )
ctx . addToken ( token . DocumentHeader ( string ( ctx . obuf ) + "---" , s . pos ( ) ) )
s . progressColumn ( ctx , 3 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanDocumentEnd ( ctx * Context ) bool {
if s . indentNum != 0 {
return false
}
if s . column != 1 {
return false
}
if ctx . repeatNum ( '.' ) != 3 {
return false
}
ctx . addToken ( token . DocumentEnd ( string ( ctx . obuf ) + "..." , s . pos ( ) ) )
s . progressColumn ( ctx , 3 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanMergeKey ( ctx * Context ) bool {
if ! s . isMergeKey ( ctx ) {
return false
}
s . lastDelimColumn = s . column
ctx . addToken ( token . MergeKey ( string ( ctx . obuf ) + "<<" , s . pos ( ) ) )
2024-10-29 20:00:48 +09:00
s . progressColumn ( ctx , 2 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanRawFoldedChar ( ctx * Context ) bool {
if ! ctx . existsBuffer ( ) {
return false
}
if ! s . isChangedToIndentStateUp ( ) {
return false
}
2024-11-09 13:00:12 +09:00
ctx . updateDocumentLineIndentColumn ( s . column )
2024-10-28 18:15:16 +09:00
ctx . isRawFolded = true
ctx . addBuf ( '-' )
ctx . addOriginBuf ( '-' )
s . progressColumn ( ctx , 1 )
return true
}
func ( s * Scanner ) scanSequence ( ctx * Context ) bool {
if ctx . existsBuffer ( ) {
return false
}
nc := ctx . nextChar ( )
2024-11-03 14:06:57 +09:00
if nc != 0 && nc != ' ' && ! s . isNewLineChar ( nc ) {
2024-10-28 18:15:16 +09:00
return false
}
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( '-' )
tk := token . SequenceEntry ( string ( ctx . obuf ) , s . pos ( ) )
s . lastDelimColumn = tk . Position . Column
ctx . addToken ( tk )
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
2024-11-03 02:11:50 +09:00
func ( s * Scanner ) scanDocumentHeader ( ctx * Context ) ( bool , error ) {
2024-10-28 18:15:16 +09:00
if ctx . existsBuffer ( ) {
return false , nil
}
2024-11-03 02:11:50 +09:00
if err := s . scanDocumentHeaderOption ( ctx ) ; err != nil {
2024-10-28 18:15:16 +09:00
return false , err
}
2024-11-03 02:11:50 +09:00
ctx . updateDocumentIndentColumn ( )
2024-10-28 18:15:16 +09:00
s . progressLine ( ctx )
return true , nil
}
2024-11-03 02:11:50 +09:00
func ( s * Scanner ) validateDocumentHeaderOption ( opt string ) error {
if len ( opt ) == 0 {
return nil
}
2024-11-05 15:25:58 +09:00
opt = strings . TrimPrefix ( opt , "-" )
opt = strings . TrimPrefix ( opt , "+" )
opt = strings . TrimSuffix ( opt , "-" )
opt = strings . TrimSuffix ( opt , "+" )
2024-11-03 02:11:50 +09:00
if len ( opt ) == 0 {
return nil
}
if _ , err := strconv . ParseInt ( opt , 10 , 64 ) ; err != nil {
return fmt . Errorf ( "invalid header option: %q" , opt )
}
return nil
}
func ( s * Scanner ) scanDocumentHeaderOption ( ctx * Context ) error {
2019-10-16 18:19:48 +09:00
header := ctx . currentChar ( )
ctx . addOriginBuf ( header )
2024-10-29 20:00:48 +09:00
s . progress ( ctx , 1 ) // skip '|' or '>' character
2019-10-16 18:19:48 +09:00
for idx , c := range ctx . src [ ctx . idx : ] {
2024-10-29 20:00:48 +09:00
progress := idx
2019-10-16 18:19:48 +09:00
ctx . addOriginBuf ( c )
switch c {
2019-12-22 11:59:49 +09:00
case '\n' , '\r' :
2019-10-16 18:19:48 +09:00
value := ctx . source ( ctx . idx , ctx . idx + idx )
opt := strings . TrimRight ( value , " " )
2021-07-19 18:48:09 +09:00
orgOptLen := len ( opt )
2024-11-03 02:11:50 +09:00
opt , err := s . trimCommentFromDocumentOpt ( opt , header )
2021-07-19 18:48:09 +09:00
if err != nil {
2024-10-29 20:00:48 +09:00
return err
2021-07-19 18:48:09 +09:00
}
2024-11-03 02:11:50 +09:00
if err := s . validateDocumentHeaderOption ( opt ) ; err != nil {
2024-10-31 22:54:26 +09:00
invalidTk := token . Invalid ( string ( ctx . obuf ) , s . pos ( ) )
2024-10-29 20:00:48 +09:00
s . progressColumn ( ctx , progress )
2024-11-03 02:11:50 +09:00
return ErrInvalidToken ( err . Error ( ) , invalidTk )
}
hasComment := len ( opt ) < orgOptLen
if s . column == 1 {
s . lastDelimColumn = 1
}
if header == '|' {
if hasComment {
commentLen := orgOptLen - len ( opt )
headerPos := strings . Index ( string ( ctx . obuf ) , "|" )
litBuf := ctx . obuf [ : len ( ctx . obuf ) - commentLen - headerPos ]
commentBuf := ctx . obuf [ len ( litBuf ) : ]
ctx . addToken ( token . Literal ( "|" + opt , string ( litBuf ) , s . pos ( ) ) )
s . column += len ( litBuf )
s . offset += len ( litBuf )
commentHeader := strings . Index ( value , "#" )
ctx . addToken ( token . Comment ( string ( value [ commentHeader + 1 : ] ) , string ( commentBuf ) , s . pos ( ) ) )
} else {
ctx . addToken ( token . Literal ( "|" + opt , string ( ctx . obuf ) , s . pos ( ) ) )
}
ctx . isLiteral = true
} else if header == '>' {
if hasComment {
commentLen := orgOptLen - len ( opt )
headerPos := strings . Index ( string ( ctx . obuf ) , ">" )
foldedBuf := ctx . obuf [ : len ( ctx . obuf ) - commentLen - headerPos ]
commentBuf := ctx . obuf [ len ( foldedBuf ) : ]
ctx . addToken ( token . Folded ( ">" + opt , string ( foldedBuf ) , s . pos ( ) ) )
s . column += len ( foldedBuf )
s . offset += len ( foldedBuf )
commentHeader := strings . Index ( value , "#" )
ctx . addToken ( token . Comment ( string ( value [ commentHeader + 1 : ] ) , string ( commentBuf ) , s . pos ( ) ) )
} else {
ctx . addToken ( token . Folded ( ">" + opt , string ( ctx . obuf ) , s . pos ( ) ) )
}
ctx . isFolded = true
2019-10-16 18:19:48 +09:00
}
2024-11-03 02:11:50 +09:00
s . indentState = IndentStateKeep
ctx . resetBuffer ( )
ctx . docOpt = opt
s . progressColumn ( ctx , progress )
return nil
2019-10-16 18:19:48 +09:00
}
}
2024-10-29 20:00:48 +09:00
text := string ( ctx . src [ ctx . idx : ] )
2024-10-31 22:54:26 +09:00
invalidTk := token . Invalid ( string ( ctx . obuf ) , s . pos ( ) )
2024-10-29 20:00:48 +09:00
s . progressColumn ( ctx , len ( text ) )
2024-11-03 02:11:50 +09:00
return ErrInvalidToken ( fmt . Sprintf ( "invalid document header: %q" , text ) , invalidTk )
2019-10-16 18:19:48 +09:00
}
2024-10-28 18:15:16 +09:00
func ( s * Scanner ) scanMapKey ( ctx * Context ) bool {
if ctx . existsBuffer ( ) {
return false
2019-10-16 18:19:48 +09:00
}
2019-12-11 17:17:05 +09:00
2024-10-28 18:15:16 +09:00
nc := ctx . nextChar ( )
if nc != ' ' {
return false
2019-12-11 17:17:05 +09:00
}
2024-10-28 18:15:16 +09:00
ctx . addToken ( token . MappingKey ( s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanDirective ( ctx * Context ) bool {
if ctx . existsBuffer ( ) {
return false
}
if s . indentNum != 0 {
return false
2024-07-16 06:17:13 -04:00
}
2024-10-28 18:15:16 +09:00
ctx . addToken ( token . Directive ( string ( ctx . obuf ) + "%" , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanAnchor ( ctx * Context ) bool {
if ctx . existsBuffer ( ) {
return false
2019-10-16 18:19:48 +09:00
}
2024-10-28 18:15:16 +09:00
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( '&' )
ctx . addToken ( token . Anchor ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
s . isAnchor = true
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
}
func ( s * Scanner ) scanAlias ( ctx * Context ) bool {
if ctx . existsBuffer ( ) {
return false
}
s . addBufferedTokenIfExists ( ctx )
ctx . addOriginBuf ( '*' )
ctx . addToken ( token . Alias ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
2024-10-30 02:18:20 +09:00
ctx . clear ( )
2024-10-28 18:15:16 +09:00
return true
2019-10-16 18:19:48 +09:00
}
2024-11-09 20:43:51 +09:00
func ( s * Scanner ) scanReservedChar ( ctx * Context , c rune ) error {
if ctx . existsBuffer ( ) {
return nil
}
ctx . addBuf ( c )
ctx . addOriginBuf ( c )
err := ErrInvalidToken ( "%q is a reserved character" , token . Invalid ( string ( ctx . obuf ) , s . pos ( ) ) )
s . progressColumn ( ctx , 1 )
ctx . clear ( )
return err
}
2024-10-29 20:00:48 +09:00
func ( s * Scanner ) scan ( ctx * Context ) error {
2019-10-16 18:19:48 +09:00
for ctx . next ( ) {
c := ctx . currentChar ( )
2024-10-28 18:15:16 +09:00
// First, change the IndentState.
// If the target character is the first character in a line, IndentState is Up/Down/Equal state.
// The second and subsequent letters are Keep.
2019-11-07 23:45:39 +09:00
s . updateIndent ( ctx , c )
2024-10-28 18:15:16 +09:00
// If IndentState is down, tokens are split, so the buffer accumulated until that point needs to be cutted as a token.
2024-10-28 17:07:44 +09:00
if s . isChangedToIndentStateDown ( ) {
s . addBufferedTokenIfExists ( ctx )
}
2019-11-07 23:45:39 +09:00
if ctx . isDocument ( ) {
2024-10-28 17:07:44 +09:00
if s . isChangedToIndentStateDown ( ) {
2024-11-01 15:03:27 +09:00
if tk := ctx . lastToken ( ) ; tk != nil {
// If literal/folded content is empty, no string token is added.
// Therefore, add an empty string token.
2024-11-02 19:29:48 +09:00
// But if literal/folded token column is 1, it is invalid at down state.
if tk . Position . Column == 1 {
return ErrInvalidToken (
"could not find document" ,
token . Invalid ( string ( ctx . obuf ) , s . pos ( ) ) ,
)
}
2024-11-01 15:03:27 +09:00
if tk . Type != token . StringType {
ctx . addToken ( token . String ( "" , "" , s . pos ( ) ) )
}
}
2024-11-03 02:11:50 +09:00
s . breakDocument ( ctx )
2019-11-07 23:45:39 +09:00
} else {
2024-11-03 02:11:50 +09:00
if err := s . scanDocument ( ctx , c ) ; err != nil {
return err
}
2019-11-07 23:45:39 +09:00
continue
}
2019-10-16 18:19:48 +09:00
}
switch c {
case '{' :
2024-10-28 18:15:16 +09:00
if s . scanFlowMapStart ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case '}' :
2024-10-28 18:15:16 +09:00
if s . scanFlowMapEnd ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case '.' :
2024-10-28 18:15:16 +09:00
if s . scanDocumentEnd ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-10-16 18:19:48 +09:00
}
case '<' :
2024-10-28 18:15:16 +09:00
if s . scanMergeKey ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-10-16 18:19:48 +09:00
}
case '-' :
2024-10-28 18:15:16 +09:00
if s . scanDocumentStart ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-10-16 18:19:48 +09:00
}
2024-10-28 18:15:16 +09:00
if s . scanRawFoldedChar ( ctx ) {
2019-10-16 18:19:48 +09:00
continue
}
2024-10-28 18:15:16 +09:00
if s . scanSequence ( ctx ) {
2019-12-04 23:58:06 +09:00
continue
}
2019-10-16 18:19:48 +09:00
case '[' :
2024-10-28 18:15:16 +09:00
if s . scanFlowArrayStart ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case ']' :
2024-10-28 18:15:16 +09:00
if s . scanFlowArrayEnd ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case ',' :
2024-10-28 18:15:16 +09:00
if s . scanFlowEntry ( ctx , c ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case ':' :
2024-10-28 18:15:16 +09:00
if s . scanMapDelim ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-10-16 18:19:48 +09:00
}
case '|' , '>' :
2024-11-03 02:11:50 +09:00
scanned , err := s . scanDocumentHeader ( ctx )
2024-10-28 18:15:16 +09:00
if err != nil {
2024-10-29 20:00:48 +09:00
return err
2024-10-28 18:15:16 +09:00
}
if scanned {
2019-10-16 18:19:48 +09:00
continue
}
case '!' :
2024-10-30 02:18:20 +09:00
if s . scanTag ( ctx ) {
continue
2019-10-16 18:19:48 +09:00
}
case '%' :
2024-10-28 18:15:16 +09:00
if s . scanDirective ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-10-16 18:19:48 +09:00
}
case '?' :
2024-10-28 18:15:16 +09:00
if s . scanMapKey ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-10-16 18:19:48 +09:00
}
case '&' :
2024-10-28 18:15:16 +09:00
if s . scanAnchor ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case '*' :
2024-10-28 18:15:16 +09:00
if s . scanAlias ( ctx ) {
2024-10-30 02:18:20 +09:00
continue
2019-11-06 19:28:47 +09:00
}
2019-10-16 18:19:48 +09:00
case '#' :
2024-10-30 02:18:20 +09:00
if s . scanComment ( ctx ) {
continue
2019-11-12 19:50:58 +09:00
}
2019-10-16 18:19:48 +09:00
case '\'' , '"' :
2024-10-31 22:54:26 +09:00
scanned , err := s . scanQuote ( ctx , c )
if err != nil {
return err
}
if scanned {
2024-10-30 02:18:20 +09:00
continue
2019-11-07 18:08:12 +09:00
}
2019-11-09 17:20:39 +09:00
case '\r' , '\n' :
2019-10-16 18:19:48 +09:00
s . scanNewLine ( ctx , c )
continue
case ' ' :
2024-10-30 02:18:20 +09:00
if s . scanWhiteSpace ( ctx ) {
2019-10-16 18:19:48 +09:00
continue
}
2024-11-09 20:43:51 +09:00
case '@' , '`' :
if err := s . scanReservedChar ( ctx , c ) ; err != nil {
return err
}
2019-10-16 18:19:48 +09:00
}
ctx . addBuf ( c )
ctx . addOriginBuf ( c )
s . progressColumn ( ctx , 1 )
}
2019-10-23 03:21:42 +09:00
s . addBufferedTokenIfExists ( ctx )
2024-10-29 20:00:48 +09:00
return nil
2019-10-16 18:19:48 +09:00
}
2019-10-21 12:53:30 +09:00
// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src.
2019-12-29 11:37:20 +09:00
func ( s * Scanner ) Init ( text string ) {
src := [ ] rune ( text )
2019-10-16 18:19:48 +09:00
s . source = src
s . sourcePos = 0
s . sourceSize = len ( src )
s . line = 1
s . column = 1
s . offset = 1
2024-10-28 15:59:31 +09:00
s . prevLineIndentNum = 0
s . lastDelimColumn = 0
2019-10-16 18:19:48 +09:00
s . indentLevel = 0
s . indentNum = 0
s . isFirstCharAtLine = true
}
2019-10-21 12:53:30 +09:00
// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
2019-10-16 18:19:48 +09:00
func ( s * Scanner ) Scan ( ) ( token . Tokens , error ) {
if s . sourcePos >= s . sourceSize {
return nil , io . EOF
}
ctx := newContext ( s . source [ s . sourcePos : ] )
2019-12-29 11:47:34 +09:00
defer ctx . release ( )
2024-10-29 20:00:48 +09:00
2019-12-29 11:47:34 +09:00
var tokens token . Tokens
2024-10-29 20:00:48 +09:00
err := s . scan ( ctx )
2019-12-29 11:47:34 +09:00
tokens = append ( tokens , ctx . tokens ... )
2024-10-29 20:00:48 +09:00
if err != nil {
var invalidTokenErr * InvalidTokenError
if errors . As ( err , & invalidTokenErr ) {
tokens = append ( tokens , invalidTokenErr . Token )
}
return tokens , err
}
2019-12-29 11:47:34 +09:00
return tokens , nil
2019-10-16 18:19:48 +09:00
}