Normalize CR and CRLF in multi-line strings (#754)

This commit is contained in:
Shuhei Kitagawa 2025-11-29 03:32:30 +01:00 committed by GitHub
parent 90e8525591
commit f4d13479ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 62 additions and 2 deletions

View file

@ -954,6 +954,14 @@ merge:
source: "v: |\n hello\n ...\n world\n",
value: map[string]string{"v": "hello\n...\nworld\n"},
},
{
source: "v: |\r\n hello\r\n ...\r\n world\r\n",
value: map[string]string{"v": "hello\n...\nworld\n"},
},
{
source: "v: |\r hello\r ...\r world\r",
value: map[string]string{"v": "hello\n...\nworld\n"},
},
{
source: "a: !!binary gIGC\n",
value: map[string]string{"a": "\x80\x81\x82"},
@ -970,6 +978,22 @@ merge:
},
},
},
{
source: "v:\r\n- A\r\n- |-\r\n B\r\n C\r\n",
value: map[string][]string{
"v": {
"A", "B\nC",
},
},
},
{
source: "v:\r- A\r- |-\r B\r C\r",
value: map[string][]string{
"v": {
"A", "B\nC",
},
},
},
{
source: "v:\n- A\n- |-\n B\n C\n\n\n",
value: map[string][]string{
@ -986,6 +1010,22 @@ merge:
},
},
},
{
source: "v:\r\n- A\r\n- >-\r\n B\r\n C\r\n",
value: map[string][]string{
"v": {
"A", "B C",
},
},
},
{
source: "v:\r- A\r- >-\r B\r C\r",
value: map[string][]string{
"v": {
"A", "B C",
},
},
},
{
source: "v:\n- A\n- >-\n B\n C\n\n\n",
value: map[string][]string{

View file

@ -777,6 +777,15 @@ func (s *Scanner) scanComment(ctx *Context) bool {
func (s *Scanner) scanMultiLine(ctx *Context, c rune) error {
state := ctx.getMultiLineState()
ctx.addOriginBuf(c)
// normalize CR and CRLF to LF
if c == '\r' {
if ctx.nextChar() == '\n' {
ctx.addOriginBuf('\n')
s.progress(ctx, 1)
s.offset++
}
c = '\n'
}
if ctx.isEOS() {
if s.isFirstCharAtLine && c == ' ' {
state.addIndent(ctx, s.column)
@ -1148,14 +1157,25 @@ func (s *Scanner) scanMultiLineHeaderOption(ctx *Context) error {
s.progress(ctx, 1) // skip '|' or '>' character
var progress int
var crlf bool
for idx, c := range ctx.src[ctx.idx:] {
progress = idx
ctx.addOriginBuf(c)
if s.isNewLineChar(c) {
nextIdx := ctx.idx + idx + 1
if c == '\r' && nextIdx < len(ctx.src) && ctx.src[nextIdx] == '\n' {
crlf = true
continue // process \n in the next iteration
}
break
}
}
value := strings.TrimRight(ctx.source(ctx.idx, ctx.idx+progress), " ")
endPos := ctx.idx + progress
if crlf {
// Exclude \r
endPos = endPos - 1
}
value := strings.TrimRight(ctx.source(ctx.idx, endPos), " ")
commentValueIndex := strings.Index(value, "#")
opt := value
if commentValueIndex > 0 {
@ -1189,7 +1209,7 @@ func (s *Scanner) scanMultiLineHeaderOption(ctx *Context) error {
ctx.setFolded(s.lastDelimColumn, opt)
}
if commentIndex > 0 {
comment := string(value[commentValueIndex+1:])
comment := value[commentValueIndex+1:]
s.offset += len(headerBuf)
s.column += len(headerBuf)
ctx.addToken(token.Comment(comment, string(ctx.obuf[len(headerBuf):]), s.pos()))