mirror of
				https://github.com/goccy/go-yaml.git
				synced 2025-10-30 21:01:06 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			452 lines
		
	
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			452 lines
		
	
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package scanner
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 
 | |
| 	"github.com/goccy/go-yaml/token"
 | |
| )
 | |
| 
 | |
| // Context context at scanning
 | |
| type Context struct {
 | |
| 	idx                int
 | |
| 	size               int
 | |
| 	notSpaceCharPos    int
 | |
| 	notSpaceOrgCharPos int
 | |
| 	src                []rune
 | |
| 	buf                []rune
 | |
| 	obuf               []rune
 | |
| 	tokens             token.Tokens
 | |
| 	mstate             *MultiLineState
 | |
| }
 | |
| 
 | |
| type MultiLineState struct {
 | |
| 	opt                              string
 | |
| 	firstLineIndentColumn            int
 | |
| 	prevLineIndentColumn             int
 | |
| 	lineIndentColumn                 int
 | |
| 	lastNotSpaceOnlyLineIndentColumn int
 | |
| 	spaceOnlyIndentColumn            int
 | |
| 	foldedNewLine                    bool
 | |
| 	isRawFolded                      bool
 | |
| 	isLiteral                        bool
 | |
| 	isFolded                         bool
 | |
| }
 | |
| 
 | |
| var (
 | |
| 	ctxPool = sync.Pool{
 | |
| 		New: func() interface{} {
 | |
| 			return createContext()
 | |
| 		},
 | |
| 	}
 | |
| )
 | |
| 
 | |
| func createContext() *Context {
 | |
| 	return &Context{
 | |
| 		idx:    0,
 | |
| 		tokens: token.Tokens{},
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func newContext(src []rune) *Context {
 | |
| 	ctx, _ := ctxPool.Get().(*Context)
 | |
| 	ctx.reset(src)
 | |
| 	return ctx
 | |
| }
 | |
| 
 | |
| func (c *Context) release() {
 | |
| 	ctxPool.Put(c)
 | |
| }
 | |
| 
 | |
| func (c *Context) clear() {
 | |
| 	c.resetBuffer()
 | |
| 	c.mstate = nil
 | |
| }
 | |
| 
 | |
| func (c *Context) reset(src []rune) {
 | |
| 	c.idx = 0
 | |
| 	c.size = len(src)
 | |
| 	c.src = src
 | |
| 	c.tokens = c.tokens[:0]
 | |
| 	c.resetBuffer()
 | |
| 	c.mstate = nil
 | |
| }
 | |
| 
 | |
| func (c *Context) resetBuffer() {
 | |
| 	c.buf = c.buf[:0]
 | |
| 	c.obuf = c.obuf[:0]
 | |
| 	c.notSpaceCharPos = 0
 | |
| 	c.notSpaceOrgCharPos = 0
 | |
| }
 | |
| 
 | |
| func (c *Context) breakMultiLine() {
 | |
| 	c.mstate = nil
 | |
| }
 | |
| 
 | |
| func (c *Context) getMultiLineState() *MultiLineState {
 | |
| 	return c.mstate
 | |
| }
 | |
| 
 | |
| func (c *Context) setLiteral(lastDelimColumn int, opt string) {
 | |
| 	mstate := &MultiLineState{
 | |
| 		isLiteral: true,
 | |
| 		opt:       opt,
 | |
| 	}
 | |
| 	indent := firstLineIndentColumnByOpt(opt)
 | |
| 	if indent > 0 {
 | |
| 		mstate.firstLineIndentColumn = lastDelimColumn + indent
 | |
| 	}
 | |
| 	c.mstate = mstate
 | |
| }
 | |
| 
 | |
| func (c *Context) setFolded(lastDelimColumn int, opt string) {
 | |
| 	mstate := &MultiLineState{
 | |
| 		isFolded: true,
 | |
| 		opt:      opt,
 | |
| 	}
 | |
| 	indent := firstLineIndentColumnByOpt(opt)
 | |
| 	if indent > 0 {
 | |
| 		mstate.firstLineIndentColumn = lastDelimColumn + indent
 | |
| 	}
 | |
| 	c.mstate = mstate
 | |
| }
 | |
| 
 | |
| func (c *Context) setRawFolded(column int) {
 | |
| 	mstate := &MultiLineState{
 | |
| 		isRawFolded: true,
 | |
| 	}
 | |
| 	mstate.updateIndentColumn(column)
 | |
| 	c.mstate = mstate
 | |
| }
 | |
| 
 | |
| func firstLineIndentColumnByOpt(opt string) int {
 | |
| 	opt = strings.TrimPrefix(opt, "-")
 | |
| 	opt = strings.TrimPrefix(opt, "+")
 | |
| 	opt = strings.TrimSuffix(opt, "-")
 | |
| 	opt = strings.TrimSuffix(opt, "+")
 | |
| 	i, _ := strconv.ParseInt(opt, 10, 64)
 | |
| 	return int(i)
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) lastDelimColumn() int {
 | |
| 	if s.firstLineIndentColumn == 0 {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return s.firstLineIndentColumn - 1
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) updateIndentColumn(column int) {
 | |
| 	if s.firstLineIndentColumn == 0 {
 | |
| 		s.firstLineIndentColumn = column
 | |
| 	}
 | |
| 	if s.lineIndentColumn == 0 {
 | |
| 		s.lineIndentColumn = column
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) updateSpaceOnlyIndentColumn(column int) {
 | |
| 	if s.firstLineIndentColumn != 0 {
 | |
| 		return
 | |
| 	}
 | |
| 	s.spaceOnlyIndentColumn = column
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) validateIndentAfterSpaceOnly(column int) error {
 | |
| 	if s.firstLineIndentColumn != 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 	if s.spaceOnlyIndentColumn > column {
 | |
| 		return errors.New("invalid number of indent is specified after space only")
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) validateIndentColumn() error {
 | |
| 	if firstLineIndentColumnByOpt(s.opt) == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 	if s.firstLineIndentColumn > s.lineIndentColumn {
 | |
| 		return errors.New("invalid number of indent is specified in the multi-line header")
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) updateNewLineState() {
 | |
| 	s.prevLineIndentColumn = s.lineIndentColumn
 | |
| 	if s.lineIndentColumn != 0 {
 | |
| 		s.lastNotSpaceOnlyLineIndentColumn = s.lineIndentColumn
 | |
| 	}
 | |
| 	s.foldedNewLine = true
 | |
| 	s.lineIndentColumn = 0
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) isIndentColumn(column int) bool {
 | |
| 	if s.firstLineIndentColumn == 0 {
 | |
| 		return column == 1
 | |
| 	}
 | |
| 	return s.firstLineIndentColumn > column
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) addIndent(ctx *Context, column int) {
 | |
| 	if s.firstLineIndentColumn == 0 {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `firstLineIndentColumn` is a positive number.
 | |
| 	if column < s.firstLineIndentColumn {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// `c.foldedNewLine` is a variable that is set to true for every newline.
 | |
| 	if !s.isLiteral && s.foldedNewLine {
 | |
| 		s.foldedNewLine = false
 | |
| 	}
 | |
| 	// Since addBuf ignore space character, add to the buffer directly.
 | |
| 	ctx.buf = append(ctx.buf, ' ')
 | |
| 	ctx.notSpaceCharPos = len(ctx.buf)
 | |
| }
 | |
| 
 | |
| // updateNewLineInFolded if Folded or RawFolded context and the content on the current line starts at the same column as the previous line,
 | |
| // treat the new-line-char as a space.
 | |
| func (s *MultiLineState) updateNewLineInFolded(ctx *Context, column int) {
 | |
| 	if s.isLiteral {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// Folded or RawFolded.
 | |
| 
 | |
| 	if !s.foldedNewLine {
 | |
| 		return
 | |
| 	}
 | |
| 	var (
 | |
| 		lastChar     rune
 | |
| 		prevLastChar rune
 | |
| 	)
 | |
| 	if len(ctx.buf) != 0 {
 | |
| 		lastChar = ctx.buf[len(ctx.buf)-1]
 | |
| 	}
 | |
| 	if len(ctx.buf) > 1 {
 | |
| 		prevLastChar = ctx.buf[len(ctx.buf)-2]
 | |
| 	}
 | |
| 	if s.lineIndentColumn == s.prevLineIndentColumn {
 | |
| 		// ---
 | |
| 		// >
 | |
| 		//  a
 | |
| 		//  b
 | |
| 		if lastChar == '\n' {
 | |
| 			ctx.buf[len(ctx.buf)-1] = ' '
 | |
| 		}
 | |
| 	} else if s.prevLineIndentColumn == 0 && s.lastNotSpaceOnlyLineIndentColumn == column {
 | |
| 		// if previous line is indent-space and new-line-char only, prevLineIndentColumn is zero.
 | |
| 		// In this case, last new-line-char is removed.
 | |
| 		// ---
 | |
| 		// >
 | |
| 		//  a
 | |
| 		//
 | |
| 		//  b
 | |
| 		if lastChar == '\n' && prevLastChar == '\n' {
 | |
| 			ctx.buf = ctx.buf[:len(ctx.buf)-1]
 | |
| 			ctx.notSpaceCharPos = len(ctx.buf)
 | |
| 		}
 | |
| 	}
 | |
| 	s.foldedNewLine = false
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) hasTrimAllEndNewlineOpt() bool {
 | |
| 	return strings.HasPrefix(s.opt, "-") || strings.HasSuffix(s.opt, "-") || s.isRawFolded
 | |
| }
 | |
| 
 | |
| func (s *MultiLineState) hasKeepAllEndNewlineOpt() bool {
 | |
| 	return strings.HasPrefix(s.opt, "+") || strings.HasSuffix(s.opt, "+")
 | |
| }
 | |
| 
 | |
| func (c *Context) addToken(tk *token.Token) {
 | |
| 	if tk == nil {
 | |
| 		return
 | |
| 	}
 | |
| 	c.tokens = append(c.tokens, tk)
 | |
| }
 | |
| 
 | |
| func (c *Context) addBuf(r rune) {
 | |
| 	if len(c.buf) == 0 && (r == ' ' || r == '\t') {
 | |
| 		return
 | |
| 	}
 | |
| 	c.buf = append(c.buf, r)
 | |
| 	if r != ' ' && r != '\t' {
 | |
| 		c.notSpaceCharPos = len(c.buf)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *Context) addBufWithTab(r rune) {
 | |
| 	if len(c.buf) == 0 && r == ' ' {
 | |
| 		return
 | |
| 	}
 | |
| 	c.buf = append(c.buf, r)
 | |
| 	if r != ' ' {
 | |
| 		c.notSpaceCharPos = len(c.buf)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *Context) addOriginBuf(r rune) {
 | |
| 	c.obuf = append(c.obuf, r)
 | |
| 	if r != ' ' && r != '\t' {
 | |
| 		c.notSpaceOrgCharPos = len(c.obuf)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *Context) removeRightSpaceFromBuf() {
 | |
| 	trimmedBuf := c.obuf[:c.notSpaceOrgCharPos]
 | |
| 	buflen := len(trimmedBuf)
 | |
| 	diff := len(c.obuf) - buflen
 | |
| 	if diff > 0 {
 | |
| 		c.obuf = c.obuf[:buflen]
 | |
| 		c.buf = c.bufferedSrc()
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *Context) isEOS() bool {
 | |
| 	return len(c.src)-1 <= c.idx
 | |
| }
 | |
| 
 | |
| func (c *Context) isNextEOS() bool {
 | |
| 	return len(c.src) <= c.idx+1
 | |
| }
 | |
| 
 | |
| func (c *Context) next() bool {
 | |
| 	return c.idx < c.size
 | |
| }
 | |
| 
 | |
| func (c *Context) source(s, e int) string {
 | |
| 	return string(c.src[s:e])
 | |
| }
 | |
| 
 | |
| func (c *Context) previousChar() rune {
 | |
| 	if c.idx > 0 {
 | |
| 		return c.src[c.idx-1]
 | |
| 	}
 | |
| 	return rune(0)
 | |
| }
 | |
| 
 | |
| func (c *Context) currentChar() rune {
 | |
| 	if c.size > c.idx {
 | |
| 		return c.src[c.idx]
 | |
| 	}
 | |
| 	return rune(0)
 | |
| }
 | |
| 
 | |
| func (c *Context) nextChar() rune {
 | |
| 	if c.size > c.idx+1 {
 | |
| 		return c.src[c.idx+1]
 | |
| 	}
 | |
| 	return rune(0)
 | |
| }
 | |
| 
 | |
| func (c *Context) repeatNum(r rune) int {
 | |
| 	cnt := 0
 | |
| 	for i := c.idx; i < c.size; i++ {
 | |
| 		if c.src[i] == r {
 | |
| 			cnt++
 | |
| 		} else {
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 	return cnt
 | |
| }
 | |
| 
 | |
| func (c *Context) progress(num int) {
 | |
| 	c.idx += num
 | |
| }
 | |
| 
 | |
| func (c *Context) existsBuffer() bool {
 | |
| 	return len(c.bufferedSrc()) != 0
 | |
| }
 | |
| 
 | |
| func (c *Context) isMultiLine() bool {
 | |
| 	return c.mstate != nil
 | |
| }
 | |
| 
 | |
| func (c *Context) bufferedSrc() []rune {
 | |
| 	src := c.buf[:c.notSpaceCharPos]
 | |
| 	if c.isMultiLine() {
 | |
| 		mstate := c.getMultiLineState()
 | |
| 		// remove end '\n' character and trailing empty lines.
 | |
| 		// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
 | |
| 		if mstate.hasTrimAllEndNewlineOpt() {
 | |
| 			// If the '-' flag is specified, all trailing newline characters will be removed.
 | |
| 			src = []rune(strings.TrimRight(string(src), "\n"))
 | |
| 		} else if !mstate.hasKeepAllEndNewlineOpt() {
 | |
| 			// Normally, all but one of the trailing newline characters are removed.
 | |
| 			var newLineCharCount int
 | |
| 			for i := len(src) - 1; i >= 0; i-- {
 | |
| 				if src[i] == '\n' {
 | |
| 					newLineCharCount++
 | |
| 					continue
 | |
| 				}
 | |
| 				break
 | |
| 			}
 | |
| 			removedNewLineCharCount := newLineCharCount - 1
 | |
| 			for removedNewLineCharCount > 0 {
 | |
| 				src = []rune(strings.TrimSuffix(string(src), "\n"))
 | |
| 				removedNewLineCharCount--
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// If the text ends with a space character, remove all of them.
 | |
| 		if mstate.hasTrimAllEndNewlineOpt() {
 | |
| 			src = []rune(strings.TrimRight(string(src), " "))
 | |
| 		}
 | |
| 		if string(src) == "\n" {
 | |
| 			// If the content consists only of a newline,
 | |
| 			// it can be considered as the document ending without any specified value,
 | |
| 			// so it is treated as an empty string.
 | |
| 			src = []rune{}
 | |
| 		}
 | |
| 		if mstate.hasKeepAllEndNewlineOpt() && len(src) == 0 {
 | |
| 			src = []rune{'\n'}
 | |
| 		}
 | |
| 	}
 | |
| 	return src
 | |
| }
 | |
| 
 | |
| func (c *Context) bufferedToken(pos *token.Position) *token.Token {
 | |
| 	if c.idx == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 	source := c.bufferedSrc()
 | |
| 	if len(source) == 0 {
 | |
| 		c.buf = c.buf[:0] // clear value's buffer only.
 | |
| 		return nil
 | |
| 	}
 | |
| 	var tk *token.Token
 | |
| 	if c.isMultiLine() {
 | |
| 		tk = token.String(string(source), string(c.obuf), pos)
 | |
| 	} else {
 | |
| 		tk = token.New(string(source), string(c.obuf), pos)
 | |
| 	}
 | |
| 	c.setTokenTypeByPrevTag(tk)
 | |
| 	c.resetBuffer()
 | |
| 	return tk
 | |
| }
 | |
| 
 | |
| func (c *Context) setTokenTypeByPrevTag(tk *token.Token) {
 | |
| 	lastTk := c.lastToken()
 | |
| 	if lastTk == nil {
 | |
| 		return
 | |
| 	}
 | |
| 	if lastTk.Type != token.TagType {
 | |
| 		return
 | |
| 	}
 | |
| 	tag := token.ReservedTagKeyword(lastTk.Value)
 | |
| 	if _, exists := token.ReservedTagKeywordMap[tag]; !exists {
 | |
| 		tk.Type = token.StringType
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (c *Context) lastToken() *token.Token {
 | |
| 	if len(c.tokens) != 0 {
 | |
| 		return c.tokens[len(c.tokens)-1]
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | 
