From f4d13479ba3d44541a65d0dffeab5e3fa805413a Mon Sep 17 00:00:00 2001 From: Shuhei Kitagawa Date: Sat, 29 Nov 2025 03:32:30 +0100 Subject: [PATCH] Normalize CR and CRLF in multi-line strings (#754) --- decode_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ scanner/scanner.go | 24 ++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/decode_test.go b/decode_test.go index 3df06e3..60ba511 100644 --- a/decode_test.go +++ b/decode_test.go @@ -954,6 +954,14 @@ merge: source: "v: |\n hello\n ...\n world\n", value: map[string]string{"v": "hello\n...\nworld\n"}, }, + { + source: "v: |\r\n hello\r\n ...\r\n world\r\n", + value: map[string]string{"v": "hello\n...\nworld\n"}, + }, + { + source: "v: |\r hello\r ...\r world\r", + value: map[string]string{"v": "hello\n...\nworld\n"}, + }, { source: "a: !!binary gIGC\n", value: map[string]string{"a": "\x80\x81\x82"}, @@ -970,6 +978,22 @@ merge: }, }, }, + { + source: "v:\r\n- A\r\n- |-\r\n B\r\n C\r\n", + value: map[string][]string{ + "v": { + "A", "B\nC", + }, + }, + }, + { + source: "v:\r- A\r- |-\r B\r C\r", + value: map[string][]string{ + "v": { + "A", "B\nC", + }, + }, + }, { source: "v:\n- A\n- |-\n B\n C\n\n\n", value: map[string][]string{ @@ -986,6 +1010,22 @@ merge: }, }, }, + { + source: "v:\r\n- A\r\n- >-\r\n B\r\n C\r\n", + value: map[string][]string{ + "v": { + "A", "B C", + }, + }, + }, + { + source: "v:\r- A\r- >-\r B\r C\r", + value: map[string][]string{ + "v": { + "A", "B C", + }, + }, + }, { source: "v:\n- A\n- >-\n B\n C\n\n\n", value: map[string][]string{ diff --git a/scanner/scanner.go b/scanner/scanner.go index 13a7ecc..799f469 100644 --- a/scanner/scanner.go +++ b/scanner/scanner.go @@ -777,6 +777,15 @@ func (s *Scanner) scanComment(ctx *Context) bool { func (s *Scanner) scanMultiLine(ctx *Context, c rune) error { state := ctx.getMultiLineState() ctx.addOriginBuf(c) + // normalize CR and CRLF to LF + if c == '\r' { + if ctx.nextChar() == '\n' { + ctx.addOriginBuf('\n') + s.progress(ctx, 1) + s.offset++ + } + c = '\n' + } if ctx.isEOS() { if s.isFirstCharAtLine && c == ' ' { state.addIndent(ctx, s.column) @@ -1148,14 +1157,25 @@ func (s *Scanner) scanMultiLineHeaderOption(ctx *Context) error { s.progress(ctx, 1) // skip '|' or '>' character var progress int + var crlf bool for idx, c := range ctx.src[ctx.idx:] { progress = idx ctx.addOriginBuf(c) if s.isNewLineChar(c) { + nextIdx := ctx.idx + idx + 1 + if c == '\r' && nextIdx < len(ctx.src) && ctx.src[nextIdx] == '\n' { + crlf = true + continue // process \n in the next iteration + } break } } - value := strings.TrimRight(ctx.source(ctx.idx, ctx.idx+progress), " ") + endPos := ctx.idx + progress + if crlf { + // Exclude \r + endPos = endPos - 1 + } + value := strings.TrimRight(ctx.source(ctx.idx, endPos), " ") commentValueIndex := strings.Index(value, "#") opt := value if commentValueIndex > 0 { @@ -1189,7 +1209,7 @@ func (s *Scanner) scanMultiLineHeaderOption(ctx *Context) error { ctx.setFolded(s.lastDelimColumn, opt) } if commentIndex > 0 { - comment := string(value[commentValueIndex+1:]) + comment := value[commentValueIndex+1:] s.offset += len(headerBuf) s.column += len(headerBuf) ctx.addToken(token.Comment(comment, string(ctx.obuf[len(headerBuf):]), s.pos()))