2019-10-16 18:19:48 +09:00
package scanner
import (
2024-11-03 02:11:50 +09:00
"fmt"
"strconv"
"strings"
2019-12-28 22:20:45 +09:00
"sync"
2019-10-16 18:19:48 +09:00
"github.com/goccy/go-yaml/token"
)
2019-10-21 12:53:30 +09:00
// Context context at scanning
2019-10-16 18:19:48 +09:00
type Context struct {
2024-11-03 02:11:50 +09:00
idx int
size int
notSpaceCharPos int
notSpaceOrgCharPos int
src [ ] rune
buf [ ] rune
obuf [ ] rune
tokens token . Tokens
isRawFolded bool
isLiteral bool
isFolded bool
docOpt string
docFirstLineIndentColumn int
docPrevLineIndentColumn int
docLineIndentColumn int
docFoldedNewLine bool
2019-12-28 22:20:45 +09:00
}
var (
ctxPool = sync . Pool {
New : func ( ) interface { } {
return createContext ( )
} ,
}
)
2019-10-16 18:19:48 +09:00
2019-12-28 22:20:45 +09:00
func createContext ( ) * Context {
2019-10-16 18:19:48 +09:00
return & Context {
2024-10-29 20:00:48 +09:00
idx : 0 ,
tokens : token . Tokens { } ,
2019-10-16 18:19:48 +09:00
}
}
2019-12-28 22:20:45 +09:00
func newContext ( src [ ] rune ) * Context {
2024-10-28 11:31:15 +09:00
ctx , _ := ctxPool . Get ( ) . ( * Context )
2019-12-28 22:20:45 +09:00
ctx . reset ( src )
return ctx
}
func ( c * Context ) release ( ) {
ctxPool . Put ( c )
}
2024-10-30 02:18:20 +09:00
func ( c * Context ) clear ( ) {
c . resetBuffer ( )
c . isRawFolded = false
c . isLiteral = false
c . isFolded = false
2024-11-03 02:11:50 +09:00
c . docOpt = ""
c . docFirstLineIndentColumn = 0
c . docLineIndentColumn = 0
c . docPrevLineIndentColumn = 0
c . docFoldedNewLine = false
2024-10-30 02:18:20 +09:00
}
2019-12-28 22:20:45 +09:00
func ( c * Context ) reset ( src [ ] rune ) {
c . idx = 0
c . size = len ( src )
c . src = src
c . tokens = c . tokens [ : 0 ]
c . resetBuffer ( )
2022-11-14 19:19:25 +09:00
c . isRawFolded = false
c . isLiteral = false
c . isFolded = false
2024-11-03 02:11:50 +09:00
c . docOpt = ""
2019-12-28 22:20:45 +09:00
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) resetBuffer ( ) {
c . buf = c . buf [ : 0 ]
c . obuf = c . obuf [ : 0 ]
2019-12-28 22:20:45 +09:00
c . notSpaceCharPos = 0
c . notSpaceOrgCharPos = 0
2019-10-16 18:19:48 +09:00
}
2024-11-03 02:11:50 +09:00
func ( c * Context ) breakDocument ( ) {
2019-10-16 18:19:48 +09:00
c . isLiteral = false
c . isRawFolded = false
c . isFolded = false
2024-11-03 02:11:50 +09:00
c . docOpt = ""
c . docFirstLineIndentColumn = 0
c . docLineIndentColumn = 0
c . docPrevLineIndentColumn = 0
c . docFoldedNewLine = false
}
func ( c * Context ) updateDocumentIndentColumn ( ) {
indent := c . docFirstLineIndentColumnByDocOpt ( )
if indent > 0 {
c . docFirstLineIndentColumn = indent + 1
}
}
func ( c * Context ) docFirstLineIndentColumnByDocOpt ( ) int {
2024-11-05 15:25:58 +09:00
opt := c . docOpt
opt = strings . TrimPrefix ( opt , "-" )
opt = strings . TrimPrefix ( opt , "+" )
opt = strings . TrimSuffix ( opt , "-" )
opt = strings . TrimSuffix ( opt , "+" )
i , _ := strconv . ParseInt ( opt , 10 , 64 )
2024-11-03 02:11:50 +09:00
return int ( i )
}
func ( c * Context ) updateDocumentLineIndentColumn ( column int ) {
if c . docFirstLineIndentColumn == 0 {
c . docFirstLineIndentColumn = column
}
if c . docLineIndentColumn == 0 {
c . docLineIndentColumn = column
}
}
func ( c * Context ) validateDocumentLineIndentColumn ( ) error {
if c . docFirstLineIndentColumnByDocOpt ( ) == 0 {
return nil
}
if c . docFirstLineIndentColumn > c . docLineIndentColumn {
return fmt . Errorf ( "invalid number of indent is specified in the document header" )
}
return nil
}
func ( c * Context ) updateDocumentNewLineState ( ) {
c . docPrevLineIndentColumn = c . docLineIndentColumn
c . docFoldedNewLine = true
c . docLineIndentColumn = 0
}
func ( c * Context ) addDocumentIndent ( column int ) {
if c . docFirstLineIndentColumn == 0 {
return
}
// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `docFirstLineIndentColumn` is a positive number.
if c . docFirstLineIndentColumn <= column {
// In the folded state, new-line-char is normally treated as space,
// but if the number of indents is different from the number of indents in the first line,
// new-line-char is used as is instead of space.
// Therefore, it is necessary to replace the space already added to buf.
// `c.docFoldedNewLine` is a variable that is set to true for every newline.
if c . isFolded && c . docFoldedNewLine {
c . buf [ len ( c . buf ) - 1 ] = '\n'
c . docFoldedNewLine = false
}
// Since addBuf ignore space character, add to the buffer directly.
c . buf = append ( c . buf , ' ' )
}
}
func ( c * Context ) addDocumentNewLineInFolded ( column int ) {
if ! c . isFolded {
return
}
if ! c . docFoldedNewLine {
return
}
if c . docFirstLineIndentColumn == c . docLineIndentColumn &&
c . docLineIndentColumn == c . docPrevLineIndentColumn {
// use space as a new line delimiter.
return
}
c . buf [ len ( c . buf ) - 1 ] = '\n'
c . docFoldedNewLine = false
2019-10-16 18:19:48 +09:00
}
func ( c * Context ) addToken ( tk * token . Token ) {
if tk == nil {
return
}
c . tokens = append ( c . tokens , tk )
}
func ( c * Context ) addBuf ( r rune ) {
2019-12-28 22:20:45 +09:00
if len ( c . buf ) == 0 && r == ' ' {
return
}
2019-10-16 18:19:48 +09:00
c . buf = append ( c . buf , r )
2020-06-17 11:38:36 +09:00
if r != ' ' && r != '\t' {
2019-12-28 22:20:45 +09:00
c . notSpaceCharPos = len ( c . buf )
}
2019-10-16 18:19:48 +09:00
}
func ( c * Context ) addOriginBuf ( r rune ) {
c . obuf = append ( c . obuf , r )
2020-06-17 11:38:36 +09:00
if r != ' ' && r != '\t' {
2019-12-28 22:20:45 +09:00
c . notSpaceOrgCharPos = len ( c . obuf )
}
2019-10-16 18:19:48 +09:00
}
2019-12-11 17:17:05 +09:00
func ( c * Context ) removeRightSpaceFromBuf ( ) int {
2019-12-28 22:20:45 +09:00
trimmedBuf := c . obuf [ : c . notSpaceOrgCharPos ]
buflen := len ( trimmedBuf )
2019-12-11 17:17:05 +09:00
diff := len ( c . obuf ) - buflen
if diff > 0 {
c . obuf = c . obuf [ : buflen ]
2019-12-28 22:37:50 +09:00
c . buf = c . bufferedSrc ( )
2019-12-11 17:17:05 +09:00
}
return diff
}
2019-11-07 23:45:39 +09:00
func ( c * Context ) isDocument ( ) bool {
return c . isLiteral || c . isFolded || c . isRawFolded
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) isEOS ( ) bool {
return len ( c . src ) - 1 <= c . idx
}
2019-10-30 16:57:59 +09:00
func ( c * Context ) isNextEOS ( ) bool {
2024-10-28 18:59:54 +03:00
return len ( c . src ) <= c . idx + 1
2019-10-30 16:57:59 +09:00
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) next ( ) bool {
return c . idx < c . size
}
func ( c * Context ) source ( s , e int ) string {
2019-11-07 17:18:17 +09:00
return string ( c . src [ s : e ] )
2019-10-16 18:19:48 +09:00
}
func ( c * Context ) previousChar ( ) rune {
if c . idx > 0 {
2019-11-07 17:18:17 +09:00
return c . src [ c . idx - 1 ]
2019-10-16 18:19:48 +09:00
}
return rune ( 0 )
}
func ( c * Context ) currentChar ( ) rune {
2022-12-02 04:02:53 +09:00
if c . size > c . idx {
return c . src [ c . idx ]
}
return rune ( 0 )
2019-10-16 18:19:48 +09:00
}
2019-10-21 12:53:30 +09:00
func ( c * Context ) nextChar ( ) rune {
if c . size > c . idx + 1 {
2019-11-07 17:18:17 +09:00
return c . src [ c . idx + 1 ]
2019-10-16 18:19:48 +09:00
}
return rune ( 0 )
}
func ( c * Context ) repeatNum ( r rune ) int {
cnt := 0
for i := c . idx ; i < c . size ; i ++ {
2019-11-07 17:18:17 +09:00
if c . src [ i ] == r {
2019-10-16 18:19:48 +09:00
cnt ++
} else {
break
}
}
return cnt
}
func ( c * Context ) progress ( num int ) {
c . idx += num
}
2019-12-28 22:37:50 +09:00
func ( c * Context ) existsBuffer ( ) bool {
return len ( c . bufferedSrc ( ) ) != 0
}
func ( c * Context ) bufferedSrc ( ) [ ] rune {
2019-12-28 22:20:45 +09:00
src := c . buf [ : c . notSpaceCharPos ]
2024-11-06 12:48:45 +09:00
if c . isDocument ( ) {
// remove end '\n' character and trailing empty lines.
2024-01-26 16:45:04 +09:00
// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
2024-11-06 12:48:45 +09:00
if c . hasTrimAllEndNewlineOpt ( ) {
// If the '-' flag is specified, all trailing newline characters will be removed.
src = [ ] rune ( strings . TrimRight ( string ( src ) , "\n" ) )
} else {
// Normally, all but one of the trailing newline characters are removed.
var newLineCharCount int
for i := len ( src ) - 1 ; i >= 0 ; i -- {
if src [ i ] == '\n' {
newLineCharCount ++
continue
}
break
2024-01-26 16:45:04 +09:00
}
2024-11-06 12:48:45 +09:00
removedNewLineCharCount := newLineCharCount - 1
for removedNewLineCharCount > 0 {
src = [ ] rune ( strings . TrimSuffix ( string ( src ) , "\n" ) )
removedNewLineCharCount --
2024-11-03 02:11:50 +09:00
}
}
2024-11-06 12:48:45 +09:00
// If the text ends with a space character, remove all of them.
src = [ ] rune ( strings . TrimRight ( string ( src ) , " " ) )
2019-11-08 16:48:54 +09:00
}
2019-12-28 22:37:50 +09:00
return src
2019-10-16 18:19:48 +09:00
}
2024-11-06 12:48:45 +09:00
func ( c * Context ) hasTrimAllEndNewlineOpt ( ) bool {
return strings . HasPrefix ( c . docOpt , "-" ) || strings . HasSuffix ( c . docOpt , "-" )
}
2019-10-16 18:19:48 +09:00
func ( c * Context ) bufferedToken ( pos * token . Position ) * token . Token {
if c . idx == 0 {
return nil
}
source := c . bufferedSrc ( )
if len ( source ) == 0 {
return nil
}
2020-06-01 12:54:23 +09:00
var tk * token . Token
if c . isDocument ( ) {
tk = token . String ( string ( source ) , string ( c . obuf ) , pos )
} else {
tk = token . New ( string ( source ) , string ( c . obuf ) , pos )
}
2019-12-28 22:20:45 +09:00
c . resetBuffer ( )
2019-10-16 18:19:48 +09:00
return tk
}
2022-12-02 04:02:53 +09:00
func ( c * Context ) lastToken ( ) * token . Token {
if len ( c . tokens ) != 0 {
return c . tokens [ len ( c . tokens ) - 1 ]
}
return nil
}