go-yaml/scanner/context.go

453 lines
9.8 KiB
Go
Raw Normal View History

2019-10-16 18:19:48 +09:00
package scanner
import (
"errors"
"strconv"
"strings"
"sync"
2019-10-16 18:19:48 +09:00
"github.com/goccy/go-yaml/token"
)
2019-10-21 12:53:30 +09:00
// Context context at scanning
2019-10-16 18:19:48 +09:00
type Context struct {
idx int
size int
notSpaceCharPos int
notSpaceOrgCharPos int
src []rune
buf []rune
obuf []rune
tokens token.Tokens
mstate *MultiLineState
}
type MultiLineState struct {
opt string
firstLineIndentColumn int
prevLineIndentColumn int
lineIndentColumn int
lastNotSpaceOnlyLineIndentColumn int
spaceOnlyIndentColumn int
foldedNewLine bool
isRawFolded bool
isLiteral bool
isFolded bool
}
var (
ctxPool = sync.Pool{
New: func() interface{} {
return createContext()
},
}
)
2019-10-16 18:19:48 +09:00
func createContext() *Context {
2019-10-16 18:19:48 +09:00
return &Context{
idx: 0,
tokens: token.Tokens{},
2019-10-16 18:19:48 +09:00
}
}
func newContext(src []rune) *Context {
2024-10-28 11:31:15 +09:00
ctx, _ := ctxPool.Get().(*Context)
ctx.reset(src)
return ctx
}
func (c *Context) release() {
ctxPool.Put(c)
}
func (c *Context) clear() {
c.resetBuffer()
c.mstate = nil
}
func (c *Context) reset(src []rune) {
c.idx = 0
c.size = len(src)
c.src = src
c.tokens = c.tokens[:0]
c.resetBuffer()
c.mstate = nil
}
2019-10-16 18:19:48 +09:00
func (c *Context) resetBuffer() {
c.buf = c.buf[:0]
c.obuf = c.obuf[:0]
c.notSpaceCharPos = 0
c.notSpaceOrgCharPos = 0
2019-10-16 18:19:48 +09:00
}
func (c *Context) breakMultiLine() {
c.mstate = nil
}
func (c *Context) getMultiLineState() *MultiLineState {
return c.mstate
}
func (c *Context) setLiteral(lastDelimColumn int, opt string) {
mstate := &MultiLineState{
isLiteral: true,
opt: opt,
}
indent := firstLineIndentColumnByOpt(opt)
if indent > 0 {
mstate.firstLineIndentColumn = lastDelimColumn + indent
}
c.mstate = mstate
}
func (c *Context) setFolded(lastDelimColumn int, opt string) {
mstate := &MultiLineState{
isFolded: true,
opt: opt,
}
indent := firstLineIndentColumnByOpt(opt)
if indent > 0 {
mstate.firstLineIndentColumn = lastDelimColumn + indent
}
c.mstate = mstate
}
func (c *Context) setRawFolded(column int) {
mstate := &MultiLineState{
isRawFolded: true,
}
mstate.updateIndentColumn(column)
c.mstate = mstate
}
func firstLineIndentColumnByOpt(opt string) int {
opt = strings.TrimPrefix(opt, "-")
opt = strings.TrimPrefix(opt, "+")
opt = strings.TrimSuffix(opt, "-")
opt = strings.TrimSuffix(opt, "+")
i, _ := strconv.ParseInt(opt, 10, 64)
return int(i)
}
func (s *MultiLineState) lastDelimColumn() int {
if s.firstLineIndentColumn == 0 {
return 0
}
return s.firstLineIndentColumn - 1
}
func (s *MultiLineState) updateIndentColumn(column int) {
if s.firstLineIndentColumn == 0 {
s.firstLineIndentColumn = column
}
if s.lineIndentColumn == 0 {
s.lineIndentColumn = column
}
}
func (s *MultiLineState) updateSpaceOnlyIndentColumn(column int) {
if s.firstLineIndentColumn != 0 {
return
}
s.spaceOnlyIndentColumn = column
}
func (s *MultiLineState) validateIndentAfterSpaceOnly(column int) error {
if s.firstLineIndentColumn != 0 {
return nil
}
if s.spaceOnlyIndentColumn > column {
return errors.New("invalid number of indent is specified after space only")
}
return nil
}
func (s *MultiLineState) validateIndentColumn() error {
if firstLineIndentColumnByOpt(s.opt) == 0 {
return nil
}
if s.firstLineIndentColumn > s.lineIndentColumn {
return errors.New("invalid number of indent is specified in the multi line header")
}
return nil
}
func (s *MultiLineState) updateNewLineState() {
s.prevLineIndentColumn = s.lineIndentColumn
if s.lineIndentColumn != 0 {
s.lastNotSpaceOnlyLineIndentColumn = s.lineIndentColumn
}
s.foldedNewLine = true
s.lineIndentColumn = 0
}
func (s *MultiLineState) isIndentColumn(column int) bool {
if s.firstLineIndentColumn == 0 {
return column == 1
}
return s.firstLineIndentColumn > column
}
func (s *MultiLineState) addIndent(ctx *Context, column int) {
if s.firstLineIndentColumn == 0 {
return
}
// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `firstLineIndentColumn` is a positive number.
if column < s.firstLineIndentColumn {
return
}
// `c.foldedNewLine` is a variable that is set to true for every newline.
if !s.isLiteral && s.foldedNewLine {
s.foldedNewLine = false
}
// Since addBuf ignore space character, add to the buffer directly.
ctx.buf = append(ctx.buf, ' ')
ctx.notSpaceCharPos = len(ctx.buf)
}
// updateNewLineInFolded if Folded or RawFolded context and the content on the current line starts at the same column as the previous line,
// treat the new-line-char as a space.
func (s *MultiLineState) updateNewLineInFolded(ctx *Context, column int) {
if s.isLiteral {
return
}
// Folded or RawFolded.
if !s.foldedNewLine {
return
}
var (
lastChar rune
prevLastChar rune
)
if len(ctx.buf) != 0 {
lastChar = ctx.buf[len(ctx.buf)-1]
}
if len(ctx.buf) > 1 {
prevLastChar = ctx.buf[len(ctx.buf)-2]
}
if s.lineIndentColumn == s.prevLineIndentColumn {
// ---
// >
// a
// b
if lastChar == '\n' {
ctx.buf[len(ctx.buf)-1] = ' '
}
} else if s.prevLineIndentColumn == 0 && s.lastNotSpaceOnlyLineIndentColumn == column {
// if previous line is indent-space and new-line-char only, prevLineIndentColumn is zero.
// In this case, last new-line-char is removed.
// ---
// >
// a
//
// b
if lastChar == '\n' && prevLastChar == '\n' {
ctx.buf = ctx.buf[:len(ctx.buf)-1]
ctx.notSpaceCharPos = len(ctx.buf)
}
}
s.foldedNewLine = false
}
func (s *MultiLineState) hasTrimAllEndNewlineOpt() bool {
return strings.HasPrefix(s.opt, "-") || strings.HasSuffix(s.opt, "-") || s.isRawFolded
}
func (s *MultiLineState) hasKeepAllEndNewlineOpt() bool {
return strings.HasPrefix(s.opt, "+") || strings.HasSuffix(s.opt, "+")
2019-10-16 18:19:48 +09:00
}
func (c *Context) addToken(tk *token.Token) {
if tk == nil {
return
}
c.tokens = append(c.tokens, tk)
}
func (c *Context) addBuf(r rune) {
if len(c.buf) == 0 && (r == ' ' || r == '\t') {
return
}
2019-10-16 18:19:48 +09:00
c.buf = append(c.buf, r)
if r != ' ' && r != '\t' {
c.notSpaceCharPos = len(c.buf)
}
2019-10-16 18:19:48 +09:00
}
func (c *Context) addBufWithTab(r rune) {
if len(c.buf) == 0 && r == ' ' {
return
}
c.buf = append(c.buf, r)
if r != ' ' {
c.notSpaceCharPos = len(c.buf)
}
}
2019-10-16 18:19:48 +09:00
func (c *Context) addOriginBuf(r rune) {
c.obuf = append(c.obuf, r)
if r != ' ' && r != '\t' {
c.notSpaceOrgCharPos = len(c.obuf)
}
2019-10-16 18:19:48 +09:00
}
func (c *Context) removeRightSpaceFromBuf() {
trimmedBuf := c.obuf[:c.notSpaceOrgCharPos]
buflen := len(trimmedBuf)
diff := len(c.obuf) - buflen
if diff > 0 {
c.obuf = c.obuf[:buflen]
c.buf = c.bufferedSrc()
}
}
2019-10-16 18:19:48 +09:00
func (c *Context) isEOS() bool {
return len(c.src)-1 <= c.idx
}
2019-10-30 16:57:59 +09:00
func (c *Context) isNextEOS() bool {
return len(c.src) <= c.idx+1
2019-10-30 16:57:59 +09:00
}
2019-10-16 18:19:48 +09:00
func (c *Context) next() bool {
return c.idx < c.size
}
func (c *Context) source(s, e int) string {
2019-11-07 17:18:17 +09:00
return string(c.src[s:e])
2019-10-16 18:19:48 +09:00
}
func (c *Context) previousChar() rune {
if c.idx > 0 {
2019-11-07 17:18:17 +09:00
return c.src[c.idx-1]
2019-10-16 18:19:48 +09:00
}
return rune(0)
}
func (c *Context) currentChar() rune {
2022-12-02 04:02:53 +09:00
if c.size > c.idx {
return c.src[c.idx]
}
return rune(0)
2019-10-16 18:19:48 +09:00
}
2019-10-21 12:53:30 +09:00
func (c *Context) nextChar() rune {
if c.size > c.idx+1 {
2019-11-07 17:18:17 +09:00
return c.src[c.idx+1]
2019-10-16 18:19:48 +09:00
}
return rune(0)
}
func (c *Context) repeatNum(r rune) int {
cnt := 0
for i := c.idx; i < c.size; i++ {
2019-11-07 17:18:17 +09:00
if c.src[i] == r {
2019-10-16 18:19:48 +09:00
cnt++
} else {
break
}
}
return cnt
}
func (c *Context) progress(num int) {
c.idx += num
}
func (c *Context) existsBuffer() bool {
return len(c.bufferedSrc()) != 0
}
func (c *Context) isMultiLine() bool {
return c.mstate != nil
}
func (c *Context) bufferedSrc() []rune {
src := c.buf[:c.notSpaceCharPos]
if c.isMultiLine() {
mstate := c.getMultiLineState()
// remove end '\n' character and trailing empty lines.
// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
if mstate.hasTrimAllEndNewlineOpt() {
// If the '-' flag is specified, all trailing newline characters will be removed.
src = []rune(strings.TrimRight(string(src), "\n"))
} else if !mstate.hasKeepAllEndNewlineOpt() {
// Normally, all but one of the trailing newline characters are removed.
var newLineCharCount int
for i := len(src) - 1; i >= 0; i-- {
if src[i] == '\n' {
newLineCharCount++
continue
}
break
}
removedNewLineCharCount := newLineCharCount - 1
for removedNewLineCharCount > 0 {
src = []rune(strings.TrimSuffix(string(src), "\n"))
removedNewLineCharCount--
}
}
// If the text ends with a space character, remove all of them.
if mstate.hasTrimAllEndNewlineOpt() {
src = []rune(strings.TrimRight(string(src), " "))
}
if string(src) == "\n" {
// If the content consists only of a newline,
// it can be considered as the document ending without any specified value,
// so it is treated as an empty string.
src = []rune{}
}
if mstate.hasKeepAllEndNewlineOpt() && len(src) == 0 {
src = []rune{'\n'}
}
2019-11-08 16:48:54 +09:00
}
return src
2019-10-16 18:19:48 +09:00
}
func (c *Context) bufferedToken(pos *token.Position) *token.Token {
if c.idx == 0 {
return nil
}
source := c.bufferedSrc()
if len(source) == 0 {
c.buf = c.buf[:0] // clear value's buffer only.
2019-10-16 18:19:48 +09:00
return nil
}
var tk *token.Token
if c.isMultiLine() {
tk = token.String(string(source), string(c.obuf), pos)
} else {
tk = token.New(string(source), string(c.obuf), pos)
}
c.setTokenTypeByPrevTag(tk)
c.resetBuffer()
2019-10-16 18:19:48 +09:00
return tk
}
2022-12-02 04:02:53 +09:00
func (c *Context) setTokenTypeByPrevTag(tk *token.Token) {
lastTk := c.lastToken()
if lastTk == nil {
return
}
if lastTk.Type != token.TagType {
return
}
tag := token.ReservedTagKeyword(lastTk.Value)
if _, exists := token.ReservedTagKeywordMap[tag]; !exists {
tk.Type = token.StringType
}
}
2022-12-02 04:02:53 +09:00
func (c *Context) lastToken() *token.Token {
if len(c.tokens) != 0 {
return c.tokens[len(c.tokens)-1]
}
return nil
}