2019-10-16 18:19:48 +09:00
package scanner
import (
2024-12-07 14:37:58 +09:00
"errors"
2024-11-03 02:11:50 +09:00
"strconv"
"strings"
2019-12-28 22:20:45 +09:00
"sync"
2019-10-16 18:19:48 +09:00
"github.com/goccy/go-yaml/token"
)
2019-10-21 12:53:30 +09:00
// Context context at scanning
2019-10-16 18:19:48 +09:00
type Context struct {
2024-12-14 20:08:27 +09:00
idx int
size int
notSpaceCharPos int
notSpaceOrgCharPos int
src [ ] rune
buf [ ] rune
obuf [ ] rune
tokens token . Tokens
mstate * MultiLineState
}
type MultiLineState struct {
opt string
firstLineIndentColumn int
prevLineIndentColumn int
lineIndentColumn int
lastNotSpaceOnlyLineIndentColumn int
spaceOnlyIndentColumn int
foldedNewLine bool
isRawFolded bool
isLiteral bool
isFolded bool
2019-12-28 22:20:45 +09:00
}
var (
ctxPool = sync . Pool {
New : func ( ) interface { } {
return createContext ( )
} ,
}
)
2019-10-16 18:19:48 +09:00
2019-12-28 22:20:45 +09:00
func createContext ( ) * Context {
2019-10-16 18:19:48 +09:00
return & Context {
2024-10-29 20:00:48 +09:00
idx : 0 ,
tokens : token . Tokens { } ,
2019-10-16 18:19:48 +09:00
}
}
2019-12-28 22:20:45 +09:00
func newContext ( src [ ] rune ) * Context {
2024-10-28 11:31:15 +09:00
ctx , _ := ctxPool . Get ( ) . ( * Context )
2019-12-28 22:20:45 +09:00
ctx . reset ( src )
return ctx
}
func ( c * Context ) release ( ) {
ctxPool . Put ( c )
}
2024-10-30 02:18:20 +09:00
func ( c * Context ) clear ( ) {
c . resetBuffer ( )
2024-12-14 20:08:27 +09:00
c . mstate = nil
2024-10-30 02:18:20 +09:00
}
2019-12-28 22:20:45 +09:00
func ( c * Context ) reset ( src [ ] rune ) {
c . idx = 0
c . size = len ( src )
c . src = src
c . tokens = c . tokens [ : 0 ]
c . resetBuffer ( )
2024-12-14 20:08:27 +09:00
c . mstate = nil
2019-12-28 22:20:45 +09:00
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) resetBuffer ( ) {
c . buf = c . buf [ : 0 ]
c . obuf = c . obuf [ : 0 ]
2019-12-28 22:20:45 +09:00
c . notSpaceCharPos = 0
c . notSpaceOrgCharPos = 0
2019-10-16 18:19:48 +09:00
}
2024-12-14 20:08:27 +09:00
func ( c * Context ) breakMultiLine ( ) {
c . mstate = nil
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
func ( c * Context ) getMultiLineState ( ) * MultiLineState {
return c . mstate
}
func ( c * Context ) setLiteral ( lastDelimColumn int , opt string ) {
mstate := & MultiLineState {
isLiteral : true ,
opt : opt ,
}
indent := firstLineIndentColumnByOpt ( opt )
2024-11-03 02:11:50 +09:00
if indent > 0 {
2024-12-14 20:08:27 +09:00
mstate . firstLineIndentColumn = lastDelimColumn + indent
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
c . mstate = mstate
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
func ( c * Context ) setFolded ( lastDelimColumn int , opt string ) {
mstate := & MultiLineState {
isFolded : true ,
opt : opt ,
}
indent := firstLineIndentColumnByOpt ( opt )
if indent > 0 {
mstate . firstLineIndentColumn = lastDelimColumn + indent
}
c . mstate = mstate
}
func ( c * Context ) setRawFolded ( column int ) {
mstate := & MultiLineState {
isRawFolded : true ,
}
mstate . updateIndentColumn ( column )
c . mstate = mstate
}
func firstLineIndentColumnByOpt ( opt string ) int {
2024-11-05 15:25:58 +09:00
opt = strings . TrimPrefix ( opt , "-" )
opt = strings . TrimPrefix ( opt , "+" )
opt = strings . TrimSuffix ( opt , "-" )
opt = strings . TrimSuffix ( opt , "+" )
i , _ := strconv . ParseInt ( opt , 10 , 64 )
2024-11-03 02:11:50 +09:00
return int ( i )
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) lastDelimColumn ( ) int {
if s . firstLineIndentColumn == 0 {
return 0
}
return s . firstLineIndentColumn - 1
}
func ( s * MultiLineState ) updateIndentColumn ( column int ) {
if s . firstLineIndentColumn == 0 {
s . firstLineIndentColumn = column
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
if s . lineIndentColumn == 0 {
s . lineIndentColumn = column
2024-11-03 02:11:50 +09:00
}
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) updateSpaceOnlyIndentColumn ( column int ) {
if s . firstLineIndentColumn != 0 {
2024-12-07 14:37:58 +09:00
return
}
2024-12-14 20:08:27 +09:00
s . spaceOnlyIndentColumn = column
2024-12-07 14:37:58 +09:00
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) validateIndentAfterSpaceOnly ( column int ) error {
if s . firstLineIndentColumn != 0 {
2024-12-07 14:37:58 +09:00
return nil
}
2024-12-14 20:08:27 +09:00
if s . spaceOnlyIndentColumn > column {
2024-12-07 14:37:58 +09:00
return errors . New ( "invalid number of indent is specified after space only" )
}
return nil
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) validateIndentColumn ( ) error {
if firstLineIndentColumnByOpt ( s . opt ) == 0 {
2024-11-03 02:11:50 +09:00
return nil
}
2024-12-14 20:08:27 +09:00
if s . firstLineIndentColumn > s . lineIndentColumn {
2024-12-14 22:57:55 +09:00
return errors . New ( "invalid number of indent is specified in the multi-line header" )
2024-11-03 02:11:50 +09:00
}
return nil
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) updateNewLineState ( ) {
s . prevLineIndentColumn = s . lineIndentColumn
if s . lineIndentColumn != 0 {
s . lastNotSpaceOnlyLineIndentColumn = s . lineIndentColumn
}
s . foldedNewLine = true
s . lineIndentColumn = 0
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) isIndentColumn ( column int ) bool {
if s . firstLineIndentColumn == 0 {
2024-11-26 22:41:11 +09:00
return column == 1
}
2024-12-14 20:08:27 +09:00
return s . firstLineIndentColumn > column
2024-11-26 22:41:11 +09:00
}
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) addIndent ( ctx * Context , column int ) {
if s . firstLineIndentColumn == 0 {
2024-11-03 02:11:50 +09:00
return
}
2024-12-14 20:08:27 +09:00
// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `firstLineIndentColumn` is a positive number.
if column < s . firstLineIndentColumn {
return
}
// `c.foldedNewLine` is a variable that is set to true for every newline.
if ! s . isLiteral && s . foldedNewLine {
s . foldedNewLine = false
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
// Since addBuf ignore space character, add to the buffer directly.
ctx . buf = append ( ctx . buf , ' ' )
ctx . notSpaceCharPos = len ( ctx . buf )
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
// updateNewLineInFolded if Folded or RawFolded context and the content on the current line starts at the same column as the previous line,
2024-11-09 13:00:12 +09:00
// treat the new-line-char as a space.
2024-12-14 20:08:27 +09:00
func ( s * MultiLineState ) updateNewLineInFolded ( ctx * Context , column int ) {
if s . isLiteral {
2024-11-03 02:11:50 +09:00
return
}
2024-11-09 13:00:12 +09:00
// Folded or RawFolded.
2024-12-14 20:08:27 +09:00
if ! s . foldedNewLine {
2024-11-03 02:11:50 +09:00
return
}
2024-12-14 20:08:27 +09:00
var (
lastChar rune
prevLastChar rune
)
if len ( ctx . buf ) != 0 {
lastChar = ctx . buf [ len ( ctx . buf ) - 1 ]
}
if len ( ctx . buf ) > 1 {
prevLastChar = ctx . buf [ len ( ctx . buf ) - 2 ]
}
if s . lineIndentColumn == s . prevLineIndentColumn {
// ---
// >
// a
// b
if lastChar == '\n' {
ctx . buf [ len ( ctx . buf ) - 1 ] = ' '
}
} else if s . prevLineIndentColumn == 0 && s . lastNotSpaceOnlyLineIndentColumn == column {
// if previous line is indent-space and new-line-char only, prevLineIndentColumn is zero.
// In this case, last new-line-char is removed.
// ---
// >
// a
//
// b
if lastChar == '\n' && prevLastChar == '\n' {
ctx . buf = ctx . buf [ : len ( ctx . buf ) - 1 ]
ctx . notSpaceCharPos = len ( ctx . buf )
2024-11-09 13:00:12 +09:00
}
2024-11-03 02:11:50 +09:00
}
2024-12-14 20:08:27 +09:00
s . foldedNewLine = false
}
func ( s * MultiLineState ) hasTrimAllEndNewlineOpt ( ) bool {
return strings . HasPrefix ( s . opt , "-" ) || strings . HasSuffix ( s . opt , "-" ) || s . isRawFolded
}
func ( s * MultiLineState ) hasKeepAllEndNewlineOpt ( ) bool {
return strings . HasPrefix ( s . opt , "+" ) || strings . HasSuffix ( s . opt , "+" )
2019-10-16 18:19:48 +09:00
}
func ( c * Context ) addToken ( tk * token . Token ) {
if tk == nil {
return
}
c . tokens = append ( c . tokens , tk )
}
func ( c * Context ) addBuf ( r rune ) {
2024-11-13 16:21:12 +09:00
if len ( c . buf ) == 0 && ( r == ' ' || r == '\t' ) {
2019-12-28 22:20:45 +09:00
return
}
2019-10-16 18:19:48 +09:00
c . buf = append ( c . buf , r )
2020-06-17 11:38:36 +09:00
if r != ' ' && r != '\t' {
2019-12-28 22:20:45 +09:00
c . notSpaceCharPos = len ( c . buf )
}
2019-10-16 18:19:48 +09:00
}
2024-11-26 22:41:11 +09:00
func ( c * Context ) addBufWithTab ( r rune ) {
if len ( c . buf ) == 0 && r == ' ' {
return
}
c . buf = append ( c . buf , r )
if r != ' ' {
c . notSpaceCharPos = len ( c . buf )
}
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) addOriginBuf ( r rune ) {
c . obuf = append ( c . obuf , r )
2020-06-17 11:38:36 +09:00
if r != ' ' && r != '\t' {
2019-12-28 22:20:45 +09:00
c . notSpaceOrgCharPos = len ( c . obuf )
}
2019-10-16 18:19:48 +09:00
}
2024-11-13 13:09:30 +09:00
func ( c * Context ) removeRightSpaceFromBuf ( ) {
2019-12-28 22:20:45 +09:00
trimmedBuf := c . obuf [ : c . notSpaceOrgCharPos ]
buflen := len ( trimmedBuf )
2019-12-11 17:17:05 +09:00
diff := len ( c . obuf ) - buflen
if diff > 0 {
c . obuf = c . obuf [ : buflen ]
2019-12-28 22:37:50 +09:00
c . buf = c . bufferedSrc ( )
2019-12-11 17:17:05 +09:00
}
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) isEOS ( ) bool {
return len ( c . src ) - 1 <= c . idx
}
2019-10-30 16:57:59 +09:00
func ( c * Context ) isNextEOS ( ) bool {
2024-10-28 18:59:54 +03:00
return len ( c . src ) <= c . idx + 1
2019-10-30 16:57:59 +09:00
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) next ( ) bool {
return c . idx < c . size
}
func ( c * Context ) source ( s , e int ) string {
2019-11-07 17:18:17 +09:00
return string ( c . src [ s : e ] )
2019-10-16 18:19:48 +09:00
}
func ( c * Context ) previousChar ( ) rune {
if c . idx > 0 {
2019-11-07 17:18:17 +09:00
return c . src [ c . idx - 1 ]
2019-10-16 18:19:48 +09:00
}
return rune ( 0 )
}
func ( c * Context ) currentChar ( ) rune {
2022-12-02 04:02:53 +09:00
if c . size > c . idx {
return c . src [ c . idx ]
}
return rune ( 0 )
2019-10-16 18:19:48 +09:00
}
2019-10-21 12:53:30 +09:00
func ( c * Context ) nextChar ( ) rune {
if c . size > c . idx + 1 {
2019-11-07 17:18:17 +09:00
return c . src [ c . idx + 1 ]
2019-10-16 18:19:48 +09:00
}
return rune ( 0 )
}
func ( c * Context ) repeatNum ( r rune ) int {
cnt := 0
for i := c . idx ; i < c . size ; i ++ {
2019-11-07 17:18:17 +09:00
if c . src [ i ] == r {
2019-10-16 18:19:48 +09:00
cnt ++
} else {
break
}
}
return cnt
}
func ( c * Context ) progress ( num int ) {
c . idx += num
}
2019-12-28 22:37:50 +09:00
func ( c * Context ) existsBuffer ( ) bool {
return len ( c . bufferedSrc ( ) ) != 0
}
2024-12-14 20:08:27 +09:00
func ( c * Context ) isMultiLine ( ) bool {
return c . mstate != nil
}
2019-12-28 22:37:50 +09:00
func ( c * Context ) bufferedSrc ( ) [ ] rune {
2019-12-28 22:20:45 +09:00
src := c . buf [ : c . notSpaceCharPos ]
2024-12-14 20:08:27 +09:00
if c . isMultiLine ( ) {
mstate := c . getMultiLineState ( )
2024-11-06 12:48:45 +09:00
// remove end '\n' character and trailing empty lines.
2024-01-26 16:45:04 +09:00
// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
2024-12-14 20:08:27 +09:00
if mstate . hasTrimAllEndNewlineOpt ( ) {
2024-11-06 12:48:45 +09:00
// If the '-' flag is specified, all trailing newline characters will be removed.
src = [ ] rune ( strings . TrimRight ( string ( src ) , "\n" ) )
2024-12-14 20:08:27 +09:00
} else if ! mstate . hasKeepAllEndNewlineOpt ( ) {
2024-11-06 12:48:45 +09:00
// Normally, all but one of the trailing newline characters are removed.
var newLineCharCount int
for i := len ( src ) - 1 ; i >= 0 ; i -- {
if src [ i ] == '\n' {
newLineCharCount ++
continue
}
break
2024-01-26 16:45:04 +09:00
}
2024-11-06 12:48:45 +09:00
removedNewLineCharCount := newLineCharCount - 1
for removedNewLineCharCount > 0 {
src = [ ] rune ( strings . TrimSuffix ( string ( src ) , "\n" ) )
removedNewLineCharCount --
2024-11-03 02:11:50 +09:00
}
}
2024-11-06 12:48:45 +09:00
// If the text ends with a space character, remove all of them.
2024-12-14 20:08:27 +09:00
if mstate . hasTrimAllEndNewlineOpt ( ) {
2024-11-17 23:58:35 +09:00
src = [ ] rune ( strings . TrimRight ( string ( src ) , " " ) )
}
2024-11-09 13:00:12 +09:00
if string ( src ) == "\n" {
// If the content consists only of a newline,
// it can be considered as the document ending without any specified value,
// so it is treated as an empty string.
src = [ ] rune { }
}
2024-12-14 20:08:27 +09:00
if mstate . hasKeepAllEndNewlineOpt ( ) && len ( src ) == 0 {
2024-11-17 23:58:35 +09:00
src = [ ] rune { '\n' }
}
2019-11-08 16:48:54 +09:00
}
2019-12-28 22:37:50 +09:00
return src
2019-10-16 18:19:48 +09:00
}
func ( c * Context ) bufferedToken ( pos * token . Position ) * token . Token {
if c . idx == 0 {
return nil
}
source := c . bufferedSrc ( )
if len ( source ) == 0 {
2024-11-17 23:58:35 +09:00
c . buf = c . buf [ : 0 ] // clear value's buffer only.
2019-10-16 18:19:48 +09:00
return nil
}
2020-06-01 12:54:23 +09:00
var tk * token . Token
2024-12-14 20:08:27 +09:00
if c . isMultiLine ( ) {
2020-06-01 12:54:23 +09:00
tk = token . String ( string ( source ) , string ( c . obuf ) , pos )
} else {
tk = token . New ( string ( source ) , string ( c . obuf ) , pos )
}
2024-11-30 14:47:52 +09:00
c . setTokenTypeByPrevTag ( tk )
2019-12-28 22:20:45 +09:00
c . resetBuffer ( )
2019-10-16 18:19:48 +09:00
return tk
}
2022-12-02 04:02:53 +09:00
2024-11-30 14:47:52 +09:00
func ( c * Context ) setTokenTypeByPrevTag ( tk * token . Token ) {
lastTk := c . lastToken ( )
if lastTk == nil {
return
}
if lastTk . Type != token . TagType {
return
}
tag := token . ReservedTagKeyword ( lastTk . Value )
if _ , exists := token . ReservedTagKeywordMap [ tag ] ; ! exists {
tk . Type = token . StringType
}
}
2022-12-02 04:02:53 +09:00
func ( c * Context ) lastToken ( ) * token . Token {
if len ( c . tokens ) != 0 {
return c . tokens [ len ( c . tokens ) - 1 ]
}
return nil
}