mirror of
https://github.com/goccy/go-yaml.git
synced 2025-12-08 06:09:57 +00:00
452 lines
9.8 KiB
Go
452 lines
9.8 KiB
Go
package scanner
|
|
|
|
import (
|
|
"errors"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/goccy/go-yaml/token"
|
|
)
|
|
|
|
// Context context at scanning
|
|
type Context struct {
|
|
idx int
|
|
size int
|
|
notSpaceCharPos int
|
|
notSpaceOrgCharPos int
|
|
src []rune
|
|
buf []rune
|
|
obuf []rune
|
|
tokens token.Tokens
|
|
mstate *MultiLineState
|
|
}
|
|
|
|
type MultiLineState struct {
|
|
opt string
|
|
firstLineIndentColumn int
|
|
prevLineIndentColumn int
|
|
lineIndentColumn int
|
|
lastNotSpaceOnlyLineIndentColumn int
|
|
spaceOnlyIndentColumn int
|
|
foldedNewLine bool
|
|
isRawFolded bool
|
|
isLiteral bool
|
|
isFolded bool
|
|
}
|
|
|
|
var (
|
|
ctxPool = sync.Pool{
|
|
New: func() interface{} {
|
|
return createContext()
|
|
},
|
|
}
|
|
)
|
|
|
|
func createContext() *Context {
|
|
return &Context{
|
|
idx: 0,
|
|
tokens: token.Tokens{},
|
|
}
|
|
}
|
|
|
|
func newContext(src []rune) *Context {
|
|
ctx, _ := ctxPool.Get().(*Context)
|
|
ctx.reset(src)
|
|
return ctx
|
|
}
|
|
|
|
func (c *Context) release() {
|
|
ctxPool.Put(c)
|
|
}
|
|
|
|
func (c *Context) clear() {
|
|
c.resetBuffer()
|
|
c.mstate = nil
|
|
}
|
|
|
|
func (c *Context) reset(src []rune) {
|
|
c.idx = 0
|
|
c.size = len(src)
|
|
c.src = src
|
|
c.tokens = c.tokens[:0]
|
|
c.resetBuffer()
|
|
c.mstate = nil
|
|
}
|
|
|
|
func (c *Context) resetBuffer() {
|
|
c.buf = c.buf[:0]
|
|
c.obuf = c.obuf[:0]
|
|
c.notSpaceCharPos = 0
|
|
c.notSpaceOrgCharPos = 0
|
|
}
|
|
|
|
func (c *Context) breakMultiLine() {
|
|
c.mstate = nil
|
|
}
|
|
|
|
func (c *Context) getMultiLineState() *MultiLineState {
|
|
return c.mstate
|
|
}
|
|
|
|
func (c *Context) setLiteral(lastDelimColumn int, opt string) {
|
|
mstate := &MultiLineState{
|
|
isLiteral: true,
|
|
opt: opt,
|
|
}
|
|
indent := firstLineIndentColumnByOpt(opt)
|
|
if indent > 0 {
|
|
mstate.firstLineIndentColumn = lastDelimColumn + indent
|
|
}
|
|
c.mstate = mstate
|
|
}
|
|
|
|
func (c *Context) setFolded(lastDelimColumn int, opt string) {
|
|
mstate := &MultiLineState{
|
|
isFolded: true,
|
|
opt: opt,
|
|
}
|
|
indent := firstLineIndentColumnByOpt(opt)
|
|
if indent > 0 {
|
|
mstate.firstLineIndentColumn = lastDelimColumn + indent
|
|
}
|
|
c.mstate = mstate
|
|
}
|
|
|
|
func (c *Context) setRawFolded(column int) {
|
|
mstate := &MultiLineState{
|
|
isRawFolded: true,
|
|
}
|
|
mstate.updateIndentColumn(column)
|
|
c.mstate = mstate
|
|
}
|
|
|
|
func firstLineIndentColumnByOpt(opt string) int {
|
|
opt = strings.TrimPrefix(opt, "-")
|
|
opt = strings.TrimPrefix(opt, "+")
|
|
opt = strings.TrimSuffix(opt, "-")
|
|
opt = strings.TrimSuffix(opt, "+")
|
|
i, _ := strconv.ParseInt(opt, 10, 64)
|
|
return int(i)
|
|
}
|
|
|
|
func (s *MultiLineState) lastDelimColumn() int {
|
|
if s.firstLineIndentColumn == 0 {
|
|
return 0
|
|
}
|
|
return s.firstLineIndentColumn - 1
|
|
}
|
|
|
|
func (s *MultiLineState) updateIndentColumn(column int) {
|
|
if s.firstLineIndentColumn == 0 {
|
|
s.firstLineIndentColumn = column
|
|
}
|
|
if s.lineIndentColumn == 0 {
|
|
s.lineIndentColumn = column
|
|
}
|
|
}
|
|
|
|
func (s *MultiLineState) updateSpaceOnlyIndentColumn(column int) {
|
|
if s.firstLineIndentColumn != 0 {
|
|
return
|
|
}
|
|
s.spaceOnlyIndentColumn = column
|
|
}
|
|
|
|
func (s *MultiLineState) validateIndentAfterSpaceOnly(column int) error {
|
|
if s.firstLineIndentColumn != 0 {
|
|
return nil
|
|
}
|
|
if s.spaceOnlyIndentColumn > column {
|
|
return errors.New("invalid number of indent is specified after space only")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *MultiLineState) validateIndentColumn() error {
|
|
if firstLineIndentColumnByOpt(s.opt) == 0 {
|
|
return nil
|
|
}
|
|
if s.firstLineIndentColumn > s.lineIndentColumn {
|
|
return errors.New("invalid number of indent is specified in the multi-line header")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *MultiLineState) updateNewLineState() {
|
|
s.prevLineIndentColumn = s.lineIndentColumn
|
|
if s.lineIndentColumn != 0 {
|
|
s.lastNotSpaceOnlyLineIndentColumn = s.lineIndentColumn
|
|
}
|
|
s.foldedNewLine = true
|
|
s.lineIndentColumn = 0
|
|
}
|
|
|
|
func (s *MultiLineState) isIndentColumn(column int) bool {
|
|
if s.firstLineIndentColumn == 0 {
|
|
return column == 1
|
|
}
|
|
return s.firstLineIndentColumn > column
|
|
}
|
|
|
|
func (s *MultiLineState) addIndent(ctx *Context, column int) {
|
|
if s.firstLineIndentColumn == 0 {
|
|
return
|
|
}
|
|
|
|
// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `firstLineIndentColumn` is a positive number.
|
|
if column < s.firstLineIndentColumn {
|
|
return
|
|
}
|
|
|
|
// `c.foldedNewLine` is a variable that is set to true for every newline.
|
|
if !s.isLiteral && s.foldedNewLine {
|
|
s.foldedNewLine = false
|
|
}
|
|
// Since addBuf ignore space character, add to the buffer directly.
|
|
ctx.buf = append(ctx.buf, ' ')
|
|
ctx.notSpaceCharPos = len(ctx.buf)
|
|
}
|
|
|
|
// updateNewLineInFolded if Folded or RawFolded context and the content on the current line starts at the same column as the previous line,
|
|
// treat the new-line-char as a space.
|
|
func (s *MultiLineState) updateNewLineInFolded(ctx *Context, column int) {
|
|
if s.isLiteral {
|
|
return
|
|
}
|
|
|
|
// Folded or RawFolded.
|
|
|
|
if !s.foldedNewLine {
|
|
return
|
|
}
|
|
var (
|
|
lastChar rune
|
|
prevLastChar rune
|
|
)
|
|
if len(ctx.buf) != 0 {
|
|
lastChar = ctx.buf[len(ctx.buf)-1]
|
|
}
|
|
if len(ctx.buf) > 1 {
|
|
prevLastChar = ctx.buf[len(ctx.buf)-2]
|
|
}
|
|
if s.lineIndentColumn == s.prevLineIndentColumn {
|
|
// ---
|
|
// >
|
|
// a
|
|
// b
|
|
if lastChar == '\n' {
|
|
ctx.buf[len(ctx.buf)-1] = ' '
|
|
}
|
|
} else if s.prevLineIndentColumn == 0 && s.lastNotSpaceOnlyLineIndentColumn == column {
|
|
// if previous line is indent-space and new-line-char only, prevLineIndentColumn is zero.
|
|
// In this case, last new-line-char is removed.
|
|
// ---
|
|
// >
|
|
// a
|
|
//
|
|
// b
|
|
if lastChar == '\n' && prevLastChar == '\n' {
|
|
ctx.buf = ctx.buf[:len(ctx.buf)-1]
|
|
ctx.notSpaceCharPos = len(ctx.buf)
|
|
}
|
|
}
|
|
s.foldedNewLine = false
|
|
}
|
|
|
|
func (s *MultiLineState) hasTrimAllEndNewlineOpt() bool {
|
|
return strings.HasPrefix(s.opt, "-") || strings.HasSuffix(s.opt, "-") || s.isRawFolded
|
|
}
|
|
|
|
func (s *MultiLineState) hasKeepAllEndNewlineOpt() bool {
|
|
return strings.HasPrefix(s.opt, "+") || strings.HasSuffix(s.opt, "+")
|
|
}
|
|
|
|
func (c *Context) addToken(tk *token.Token) {
|
|
if tk == nil {
|
|
return
|
|
}
|
|
c.tokens = append(c.tokens, tk)
|
|
}
|
|
|
|
func (c *Context) addBuf(r rune) {
|
|
if len(c.buf) == 0 && (r == ' ' || r == '\t') {
|
|
return
|
|
}
|
|
c.buf = append(c.buf, r)
|
|
if r != ' ' && r != '\t' {
|
|
c.notSpaceCharPos = len(c.buf)
|
|
}
|
|
}
|
|
|
|
func (c *Context) addBufWithTab(r rune) {
|
|
if len(c.buf) == 0 && r == ' ' {
|
|
return
|
|
}
|
|
c.buf = append(c.buf, r)
|
|
if r != ' ' {
|
|
c.notSpaceCharPos = len(c.buf)
|
|
}
|
|
}
|
|
|
|
func (c *Context) addOriginBuf(r rune) {
|
|
c.obuf = append(c.obuf, r)
|
|
if r != ' ' && r != '\t' {
|
|
c.notSpaceOrgCharPos = len(c.obuf)
|
|
}
|
|
}
|
|
|
|
func (c *Context) removeRightSpaceFromBuf() {
|
|
trimmedBuf := c.obuf[:c.notSpaceOrgCharPos]
|
|
buflen := len(trimmedBuf)
|
|
diff := len(c.obuf) - buflen
|
|
if diff > 0 {
|
|
c.obuf = c.obuf[:buflen]
|
|
c.buf = c.bufferedSrc()
|
|
}
|
|
}
|
|
|
|
func (c *Context) isEOS() bool {
|
|
return len(c.src)-1 <= c.idx
|
|
}
|
|
|
|
func (c *Context) isNextEOS() bool {
|
|
return len(c.src) <= c.idx+1
|
|
}
|
|
|
|
func (c *Context) next() bool {
|
|
return c.idx < c.size
|
|
}
|
|
|
|
func (c *Context) source(s, e int) string {
|
|
return string(c.src[s:e])
|
|
}
|
|
|
|
func (c *Context) previousChar() rune {
|
|
if c.idx > 0 {
|
|
return c.src[c.idx-1]
|
|
}
|
|
return rune(0)
|
|
}
|
|
|
|
func (c *Context) currentChar() rune {
|
|
if c.size > c.idx {
|
|
return c.src[c.idx]
|
|
}
|
|
return rune(0)
|
|
}
|
|
|
|
func (c *Context) nextChar() rune {
|
|
if c.size > c.idx+1 {
|
|
return c.src[c.idx+1]
|
|
}
|
|
return rune(0)
|
|
}
|
|
|
|
func (c *Context) repeatNum(r rune) int {
|
|
cnt := 0
|
|
for i := c.idx; i < c.size; i++ {
|
|
if c.src[i] == r {
|
|
cnt++
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
return cnt
|
|
}
|
|
|
|
func (c *Context) progress(num int) {
|
|
c.idx += num
|
|
}
|
|
|
|
func (c *Context) existsBuffer() bool {
|
|
return len(c.bufferedSrc()) != 0
|
|
}
|
|
|
|
func (c *Context) isMultiLine() bool {
|
|
return c.mstate != nil
|
|
}
|
|
|
|
func (c *Context) bufferedSrc() []rune {
|
|
src := c.buf[:c.notSpaceCharPos]
|
|
if c.isMultiLine() {
|
|
mstate := c.getMultiLineState()
|
|
// remove end '\n' character and trailing empty lines.
|
|
// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
|
|
if mstate.hasTrimAllEndNewlineOpt() {
|
|
// If the '-' flag is specified, all trailing newline characters will be removed.
|
|
src = []rune(strings.TrimRight(string(src), "\n"))
|
|
} else if !mstate.hasKeepAllEndNewlineOpt() {
|
|
// Normally, all but one of the trailing newline characters are removed.
|
|
var newLineCharCount int
|
|
for i := len(src) - 1; i >= 0; i-- {
|
|
if src[i] == '\n' {
|
|
newLineCharCount++
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
removedNewLineCharCount := newLineCharCount - 1
|
|
for removedNewLineCharCount > 0 {
|
|
src = []rune(strings.TrimSuffix(string(src), "\n"))
|
|
removedNewLineCharCount--
|
|
}
|
|
}
|
|
|
|
// If the text ends with a space character, remove all of them.
|
|
if mstate.hasTrimAllEndNewlineOpt() {
|
|
src = []rune(strings.TrimRight(string(src), " "))
|
|
}
|
|
if string(src) == "\n" {
|
|
// If the content consists only of a newline,
|
|
// it can be considered as the document ending without any specified value,
|
|
// so it is treated as an empty string.
|
|
src = []rune{}
|
|
}
|
|
if mstate.hasKeepAllEndNewlineOpt() && len(src) == 0 {
|
|
src = []rune{'\n'}
|
|
}
|
|
}
|
|
return src
|
|
}
|
|
|
|
func (c *Context) bufferedToken(pos *token.Position) *token.Token {
|
|
if c.idx == 0 {
|
|
return nil
|
|
}
|
|
source := c.bufferedSrc()
|
|
if len(source) == 0 {
|
|
c.buf = c.buf[:0] // clear value's buffer only.
|
|
return nil
|
|
}
|
|
var tk *token.Token
|
|
if c.isMultiLine() {
|
|
tk = token.String(string(source), string(c.obuf), pos)
|
|
} else {
|
|
tk = token.New(string(source), string(c.obuf), pos)
|
|
}
|
|
c.setTokenTypeByPrevTag(tk)
|
|
c.resetBuffer()
|
|
return tk
|
|
}
|
|
|
|
func (c *Context) setTokenTypeByPrevTag(tk *token.Token) {
|
|
lastTk := c.lastToken()
|
|
if lastTk == nil {
|
|
return
|
|
}
|
|
if lastTk.Type != token.TagType {
|
|
return
|
|
}
|
|
tag := token.ReservedTagKeyword(lastTk.Value)
|
|
if _, exists := token.ReservedTagKeywordMap[tag]; !exists {
|
|
tk.Type = token.StringType
|
|
}
|
|
}
|
|
|
|
func (c *Context) lastToken() *token.Token {
|
|
if len(c.tokens) != 0 {
|
|
return c.tokens[len(c.tokens)-1]
|
|
}
|
|
return nil
|
|
}
|