mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
encoding/csv: add FieldPos method
This enables a consumer of a CSV to find out the position of a CSV field without implementing an intermediate buffer. This is useful to produce good higher level error messages when the CSV syntax is OK but the field values don't match expectations. This also changes the existing semantics of the `ParseError.Column` field to bring it in line with precedent elsewhere in the Go standard library (notably go/token.Position) - the column is now 1-based and indicates a byte count rather than a rune count, and the error position reporting at the end of a last line without a newline is now fixed. This change has some impact on performance: ``` name old time/op new time/op delta Read-8 2.14µs ± 0% 2.15µs ± 0% ~ (p=0.056 n=5+5) ReadWithFieldsPerRecord-8 2.15µs ± 2% 2.15µs ± 1% ~ (p=0.151 n=5+5) ReadWithoutFieldsPerRecord-8 2.15µs ± 0% 2.15µs ± 0% +0.37% (p=0.024 n=5+5) ReadLargeFields-8 3.55µs ± 2% 3.59µs ± 0% ~ (p=0.206 n=5+5) ReadReuseRecord-8 1.18µs ± 1% 1.22µs ± 1% +2.93% (p=0.008 n=5+5) ReadReuseRecordWithFieldsPerRecord-8 1.18µs ± 0% 1.21µs ± 0% +2.54% (p=0.008 n=5+5) ReadReuseRecordWithoutFieldsPerRecord-8 1.18µs ± 0% 1.22µs ± 1% +3.66% (p=0.008 n=5+5) ReadReuseRecordLargeFields-8 2.53µs ± 1% 2.57µs ± 1% +1.70% (p=0.008 n=5+5) Write-8 1.02µs ± 1% 1.01µs ± 0% -1.18% (p=0.016 n=5+4) ``` Fixes #44221. Change-Id: Id37c50fc396024eef406c5bad45380ecd414f5ea Reviewed-on: https://go-review.googlesource.com/c/go/+/291290 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org> Trust: Paul Jolly <paul@myitcv.org.uk>
This commit is contained in:
parent
2c05ba4ae0
commit
6d95e5a4ff
2 changed files with 587 additions and 397 deletions
|
|
@ -66,7 +66,7 @@ import (
|
||||||
type ParseError struct {
|
type ParseError struct {
|
||||||
StartLine int // Line where the record starts
|
StartLine int // Line where the record starts
|
||||||
Line int // Line where the error occurred
|
Line int // Line where the error occurred
|
||||||
Column int // Column (rune index) where the error occurred
|
Column int // Column (1-based byte index) where the error occurred
|
||||||
Err error // The actual error
|
Err error // The actual error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -162,6 +162,10 @@ type Reader struct {
|
||||||
// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
|
// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
|
||||||
fieldIndexes []int
|
fieldIndexes []int
|
||||||
|
|
||||||
|
// fieldPositions is an index of field positions for the
|
||||||
|
// last record returned by Read.
|
||||||
|
fieldPositions []position
|
||||||
|
|
||||||
// lastRecord is a record cache and only used when ReuseRecord == true.
|
// lastRecord is a record cache and only used when ReuseRecord == true.
|
||||||
lastRecord []string
|
lastRecord []string
|
||||||
}
|
}
|
||||||
|
|
@ -192,6 +196,25 @@ func (r *Reader) Read() (record []string, err error) {
|
||||||
return record, err
|
return record, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FieldPos returns the line and column corresponding to
|
||||||
|
// the start of the field with the given index in the slice most recently
|
||||||
|
// returned by Read. Numbering of lines and columns starts at 1;
|
||||||
|
// columns are counted in bytes, not runes.
|
||||||
|
//
|
||||||
|
// If this is called with an out-of-bounds index, it panics.
|
||||||
|
func (r *Reader) FieldPos(field int) (line, column int) {
|
||||||
|
if field < 0 || field >= len(r.fieldPositions) {
|
||||||
|
panic("out of range index passed to FieldPos")
|
||||||
|
}
|
||||||
|
p := &r.fieldPositions[field]
|
||||||
|
return p.line, p.col
|
||||||
|
}
|
||||||
|
|
||||||
|
// pos holds the position of a field in the current line.
|
||||||
|
type position struct {
|
||||||
|
line, col int
|
||||||
|
}
|
||||||
|
|
||||||
// ReadAll reads all the remaining records from r.
|
// ReadAll reads all the remaining records from r.
|
||||||
// Each record is a slice of fields.
|
// Each record is a slice of fields.
|
||||||
// A successful call returns err == nil, not err == io.EOF. Because ReadAll is
|
// A successful call returns err == nil, not err == io.EOF. Because ReadAll is
|
||||||
|
|
@ -260,7 +283,7 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read line (automatically skipping past empty lines and any comments).
|
// Read line (automatically skipping past empty lines and any comments).
|
||||||
var line, fullLine []byte
|
var line []byte
|
||||||
var errRead error
|
var errRead error
|
||||||
for errRead == nil {
|
for errRead == nil {
|
||||||
line, errRead = r.readLine()
|
line, errRead = r.readLine()
|
||||||
|
|
@ -272,7 +295,6 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||||
line = nil
|
line = nil
|
||||||
continue // Skip empty lines
|
continue // Skip empty lines
|
||||||
}
|
}
|
||||||
fullLine = line
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if errRead == io.EOF {
|
if errRead == io.EOF {
|
||||||
|
|
@ -286,10 +308,20 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||||
recLine := r.numLine // Starting line for record
|
recLine := r.numLine // Starting line for record
|
||||||
r.recordBuffer = r.recordBuffer[:0]
|
r.recordBuffer = r.recordBuffer[:0]
|
||||||
r.fieldIndexes = r.fieldIndexes[:0]
|
r.fieldIndexes = r.fieldIndexes[:0]
|
||||||
|
r.fieldPositions = r.fieldPositions[:0]
|
||||||
|
pos := position{line: r.numLine, col: 1}
|
||||||
parseField:
|
parseField:
|
||||||
for {
|
for {
|
||||||
if r.TrimLeadingSpace {
|
if r.TrimLeadingSpace {
|
||||||
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
|
i := bytes.IndexFunc(line, func(r rune) bool {
|
||||||
|
return !unicode.IsSpace(r)
|
||||||
|
})
|
||||||
|
if i < 0 {
|
||||||
|
i = len(line)
|
||||||
|
pos.col -= lengthNL(line)
|
||||||
|
}
|
||||||
|
line = line[i:]
|
||||||
|
pos.col += i
|
||||||
}
|
}
|
||||||
if len(line) == 0 || line[0] != '"' {
|
if len(line) == 0 || line[0] != '"' {
|
||||||
// Non-quoted string field
|
// Non-quoted string field
|
||||||
|
|
@ -303,48 +335,56 @@ parseField:
|
||||||
// Check to make sure a quote does not appear in field.
|
// Check to make sure a quote does not appear in field.
|
||||||
if !r.LazyQuotes {
|
if !r.LazyQuotes {
|
||||||
if j := bytes.IndexByte(field, '"'); j >= 0 {
|
if j := bytes.IndexByte(field, '"'); j >= 0 {
|
||||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
|
col := pos.col + j
|
||||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
|
||||||
break parseField
|
break parseField
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
r.recordBuffer = append(r.recordBuffer, field...)
|
r.recordBuffer = append(r.recordBuffer, field...)
|
||||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||||
|
r.fieldPositions = append(r.fieldPositions, pos)
|
||||||
if i >= 0 {
|
if i >= 0 {
|
||||||
line = line[i+commaLen:]
|
line = line[i+commaLen:]
|
||||||
|
pos.col += i + commaLen
|
||||||
continue parseField
|
continue parseField
|
||||||
}
|
}
|
||||||
break parseField
|
break parseField
|
||||||
} else {
|
} else {
|
||||||
// Quoted string field
|
// Quoted string field
|
||||||
|
fieldPos := pos
|
||||||
line = line[quoteLen:]
|
line = line[quoteLen:]
|
||||||
|
pos.col += quoteLen
|
||||||
for {
|
for {
|
||||||
i := bytes.IndexByte(line, '"')
|
i := bytes.IndexByte(line, '"')
|
||||||
if i >= 0 {
|
if i >= 0 {
|
||||||
// Hit next quote.
|
// Hit next quote.
|
||||||
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
||||||
line = line[i+quoteLen:]
|
line = line[i+quoteLen:]
|
||||||
|
pos.col += i + quoteLen
|
||||||
switch rn := nextRune(line); {
|
switch rn := nextRune(line); {
|
||||||
case rn == '"':
|
case rn == '"':
|
||||||
// `""` sequence (append quote).
|
// `""` sequence (append quote).
|
||||||
r.recordBuffer = append(r.recordBuffer, '"')
|
r.recordBuffer = append(r.recordBuffer, '"')
|
||||||
line = line[quoteLen:]
|
line = line[quoteLen:]
|
||||||
|
pos.col += quoteLen
|
||||||
case rn == r.Comma:
|
case rn == r.Comma:
|
||||||
// `",` sequence (end of field).
|
// `",` sequence (end of field).
|
||||||
line = line[commaLen:]
|
line = line[commaLen:]
|
||||||
|
pos.col += commaLen
|
||||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||||
|
r.fieldPositions = append(r.fieldPositions, fieldPos)
|
||||||
continue parseField
|
continue parseField
|
||||||
case lengthNL(line) == len(line):
|
case lengthNL(line) == len(line):
|
||||||
// `"\n` sequence (end of line).
|
// `"\n` sequence (end of line).
|
||||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||||
|
r.fieldPositions = append(r.fieldPositions, fieldPos)
|
||||||
break parseField
|
break parseField
|
||||||
case r.LazyQuotes:
|
case r.LazyQuotes:
|
||||||
// `"` sequence (bare quote).
|
// `"` sequence (bare quote).
|
||||||
r.recordBuffer = append(r.recordBuffer, '"')
|
r.recordBuffer = append(r.recordBuffer, '"')
|
||||||
default:
|
default:
|
||||||
// `"*` sequence (invalid non-escaped quote).
|
// `"*` sequence (invalid non-escaped quote).
|
||||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
|
||||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
|
|
||||||
break parseField
|
break parseField
|
||||||
}
|
}
|
||||||
} else if len(line) > 0 {
|
} else if len(line) > 0 {
|
||||||
|
|
@ -353,19 +393,23 @@ parseField:
|
||||||
if errRead != nil {
|
if errRead != nil {
|
||||||
break parseField
|
break parseField
|
||||||
}
|
}
|
||||||
|
pos.col += len(line)
|
||||||
line, errRead = r.readLine()
|
line, errRead = r.readLine()
|
||||||
|
if len(line) > 0 {
|
||||||
|
pos.line++
|
||||||
|
pos.col = 1
|
||||||
|
}
|
||||||
if errRead == io.EOF {
|
if errRead == io.EOF {
|
||||||
errRead = nil
|
errRead = nil
|
||||||
}
|
}
|
||||||
fullLine = line
|
|
||||||
} else {
|
} else {
|
||||||
// Abrupt end of file (EOF or error).
|
// Abrupt end of file (EOF or error).
|
||||||
if !r.LazyQuotes && errRead == nil {
|
if !r.LazyQuotes && errRead == nil {
|
||||||
col := utf8.RuneCount(fullLine)
|
err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
|
||||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
|
|
||||||
break parseField
|
break parseField
|
||||||
}
|
}
|
||||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||||
|
r.fieldPositions = append(r.fieldPositions, fieldPos)
|
||||||
break parseField
|
break parseField
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -392,7 +436,12 @@ parseField:
|
||||||
// Check or update the expected fields per record.
|
// Check or update the expected fields per record.
|
||||||
if r.FieldsPerRecord > 0 {
|
if r.FieldsPerRecord > 0 {
|
||||||
if len(dst) != r.FieldsPerRecord && err == nil {
|
if len(dst) != r.FieldsPerRecord && err == nil {
|
||||||
err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
|
err = &ParseError{
|
||||||
|
StartLine: recLine,
|
||||||
|
Line: recLine,
|
||||||
|
Column: 1,
|
||||||
|
Err: ErrFieldCount,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if r.FieldsPerRecord == 0 {
|
} else if r.FieldsPerRecord == 0 {
|
||||||
r.FieldsPerRecord = len(dst)
|
r.FieldsPerRecord = len(dst)
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
package csv
|
package csv
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
@ -12,12 +14,12 @@ import (
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRead(t *testing.T) {
|
type readTest struct {
|
||||||
tests := []struct {
|
|
||||||
Name string
|
Name string
|
||||||
Input string
|
Input string
|
||||||
Output [][]string
|
Output [][]string
|
||||||
Error error
|
Positions [][][2]int
|
||||||
|
Errors []error
|
||||||
|
|
||||||
// These fields are copied into the Reader
|
// These fields are copied into the Reader
|
||||||
Comma rune
|
Comma rune
|
||||||
|
|
@ -27,25 +29,32 @@ func TestRead(t *testing.T) {
|
||||||
LazyQuotes bool
|
LazyQuotes bool
|
||||||
TrimLeadingSpace bool
|
TrimLeadingSpace bool
|
||||||
ReuseRecord bool
|
ReuseRecord bool
|
||||||
}{{
|
}
|
||||||
|
|
||||||
|
// In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
|
||||||
|
// the start of a field, a record boundary and the position of an error respectively.
|
||||||
|
// They are removed before parsing and are used to verify the position
|
||||||
|
// information reported by FieldPos.
|
||||||
|
|
||||||
|
var readTests = []readTest{{
|
||||||
Name: "Simple",
|
Name: "Simple",
|
||||||
Input: "a,b,c\n",
|
Input: "§a,§b,§c\n",
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "CRLF",
|
Name: "CRLF",
|
||||||
Input: "a,b\r\nc,d\r\n",
|
Input: "§a,§b\r\n¶§c,§d\r\n",
|
||||||
Output: [][]string{{"a", "b"}, {"c", "d"}},
|
Output: [][]string{{"a", "b"}, {"c", "d"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "BareCR",
|
Name: "BareCR",
|
||||||
Input: "a,b\rc,d\r\n",
|
Input: "§a,§b\rc,§d\r\n",
|
||||||
Output: [][]string{{"a", "b\rc", "d"}},
|
Output: [][]string{{"a", "b\rc", "d"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "RFC4180test",
|
Name: "RFC4180test",
|
||||||
Input: `#field1,field2,field3
|
Input: `§#field1,§field2,§field3
|
||||||
"aaa","bb
|
¶§"aaa",§"bb
|
||||||
b","ccc"
|
b",§"ccc"
|
||||||
"a,a","b""bb","ccc"
|
¶§"a,a",§"b""bb",§"ccc"
|
||||||
zzz,yyy,xxx
|
¶§zzz,§yyy,§xxx
|
||||||
`,
|
`,
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"#field1", "field2", "field3"},
|
{"#field1", "field2", "field3"},
|
||||||
|
|
@ -55,147 +64,156 @@ zzz,yyy,xxx
|
||||||
},
|
},
|
||||||
UseFieldsPerRecord: true,
|
UseFieldsPerRecord: true,
|
||||||
FieldsPerRecord: 0,
|
FieldsPerRecord: 0,
|
||||||
}, {
|
}, {
|
||||||
Name: "NoEOLTest",
|
Name: "NoEOLTest",
|
||||||
Input: "a,b,c",
|
Input: "§a,§b,§c",
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "Semicolon",
|
Name: "Semicolon",
|
||||||
Input: "a;b;c\n",
|
Input: "§a;§b;§c\n",
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
Comma: ';',
|
Comma: ';',
|
||||||
}, {
|
}, {
|
||||||
Name: "MultiLine",
|
Name: "MultiLine",
|
||||||
Input: `"two
|
Input: `§"two
|
||||||
line","one line","three
|
line",§"one line",§"three
|
||||||
line
|
line
|
||||||
field"`,
|
field"`,
|
||||||
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
|
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "BlankLine",
|
Name: "BlankLine",
|
||||||
Input: "a,b,c\n\nd,e,f\n\n",
|
Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"a", "b", "c"},
|
{"a", "b", "c"},
|
||||||
{"d", "e", "f"},
|
{"d", "e", "f"},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Name: "BlankLineFieldCount",
|
Name: "BlankLineFieldCount",
|
||||||
Input: "a,b,c\n\nd,e,f\n\n",
|
Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"a", "b", "c"},
|
{"a", "b", "c"},
|
||||||
{"d", "e", "f"},
|
{"d", "e", "f"},
|
||||||
},
|
},
|
||||||
UseFieldsPerRecord: true,
|
UseFieldsPerRecord: true,
|
||||||
FieldsPerRecord: 0,
|
FieldsPerRecord: 0,
|
||||||
}, {
|
}, {
|
||||||
Name: "TrimSpace",
|
Name: "TrimSpace",
|
||||||
Input: " a, b, c\n",
|
Input: " §a, §b, §c\n",
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "LeadingSpace",
|
Name: "LeadingSpace",
|
||||||
Input: " a, b, c\n",
|
Input: "§ a,§ b,§ c\n",
|
||||||
Output: [][]string{{" a", " b", " c"}},
|
Output: [][]string{{" a", " b", " c"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "Comment",
|
Name: "Comment",
|
||||||
Input: "#1,2,3\na,b,c\n#comment",
|
Input: "#1,2,3\n§a,§b,§c\n#comment",
|
||||||
Output: [][]string{{"a", "b", "c"}},
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
Comment: '#',
|
Comment: '#',
|
||||||
}, {
|
}, {
|
||||||
Name: "NoComment",
|
Name: "NoComment",
|
||||||
Input: "#1,2,3\na,b,c",
|
Input: "§#1,§2,§3\n¶§a,§b,§c",
|
||||||
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
|
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "LazyQuotes",
|
Name: "LazyQuotes",
|
||||||
Input: `a "word","1"2",a","b`,
|
Input: `§a "word",§"1"2",§a",§"b`,
|
||||||
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
|
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
|
||||||
LazyQuotes: true,
|
LazyQuotes: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "BareQuotes",
|
Name: "BareQuotes",
|
||||||
Input: `a "word","1"2",a"`,
|
Input: `§a "word",§"1"2",§a"`,
|
||||||
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
|
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
|
||||||
LazyQuotes: true,
|
LazyQuotes: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "BareDoubleQuotes",
|
Name: "BareDoubleQuotes",
|
||||||
Input: `a""b,c`,
|
Input: `§a""b,§c`,
|
||||||
Output: [][]string{{`a""b`, `c`}},
|
Output: [][]string{{`a""b`, `c`}},
|
||||||
LazyQuotes: true,
|
LazyQuotes: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "BadDoubleQuotes",
|
Name: "BadDoubleQuotes",
|
||||||
Input: `a""b,c`,
|
Input: `§a∑""b,c`,
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
|
Errors: []error{&ParseError{Err: ErrBareQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "TrimQuote",
|
Name: "TrimQuote",
|
||||||
Input: ` "a"," b",c`,
|
Input: ` §"a",§" b",§c`,
|
||||||
Output: [][]string{{"a", " b", "c"}},
|
Output: [][]string{{"a", " b", "c"}},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "BadBareQuote",
|
Name: "BadBareQuote",
|
||||||
Input: `a "word","b"`,
|
Input: `§a ∑"word","b"`,
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
|
Errors: []error{&ParseError{Err: ErrBareQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadTrailingQuote",
|
Name: "BadTrailingQuote",
|
||||||
Input: `"a word",b"`,
|
Input: `§"a word",b∑"`,
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
|
Errors: []error{&ParseError{Err: ErrBareQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "ExtraneousQuote",
|
Name: "ExtraneousQuote",
|
||||||
Input: `"a "word","b"`,
|
Input: `§"a ∑"word","b"`,
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
|
Errors: []error{&ParseError{Err: ErrQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadFieldCount",
|
Name: "BadFieldCount",
|
||||||
Input: "a,b,c\nd,e",
|
Input: "§a,§b,§c\n¶∑§d,§e",
|
||||||
Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
|
Errors: []error{nil, &ParseError{Err: ErrFieldCount}},
|
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
||||||
UseFieldsPerRecord: true,
|
UseFieldsPerRecord: true,
|
||||||
FieldsPerRecord: 0,
|
FieldsPerRecord: 0,
|
||||||
}, {
|
}, {
|
||||||
|
Name: "BadFieldCountMultiple",
|
||||||
|
Input: "§a,§b,§c\n¶∑§d,§e\n¶∑§f",
|
||||||
|
Errors: []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}},
|
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
|
||||||
|
UseFieldsPerRecord: true,
|
||||||
|
FieldsPerRecord: 0,
|
||||||
|
}, {
|
||||||
Name: "BadFieldCount1",
|
Name: "BadFieldCount1",
|
||||||
Input: `a,b,c`,
|
Input: `§∑a,§b,§c`,
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
|
Errors: []error{&ParseError{Err: ErrFieldCount}},
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
UseFieldsPerRecord: true,
|
UseFieldsPerRecord: true,
|
||||||
FieldsPerRecord: 2,
|
FieldsPerRecord: 2,
|
||||||
}, {
|
}, {
|
||||||
Name: "FieldCount",
|
Name: "FieldCount",
|
||||||
Input: "a,b,c\nd,e",
|
Input: "§a,§b,§c\n¶§d,§e",
|
||||||
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCommaEOF",
|
Name: "TrailingCommaEOF",
|
||||||
Input: "a,b,c,",
|
Input: "§a,§b,§c,§",
|
||||||
Output: [][]string{{"a", "b", "c", ""}},
|
Output: [][]string{{"a", "b", "c", ""}},
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCommaEOL",
|
Name: "TrailingCommaEOL",
|
||||||
Input: "a,b,c,\n",
|
Input: "§a,§b,§c,§\n",
|
||||||
Output: [][]string{{"a", "b", "c", ""}},
|
Output: [][]string{{"a", "b", "c", ""}},
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCommaSpaceEOF",
|
Name: "TrailingCommaSpaceEOF",
|
||||||
Input: "a,b,c, ",
|
Input: "§a,§b,§c, §",
|
||||||
Output: [][]string{{"a", "b", "c", ""}},
|
Output: [][]string{{"a", "b", "c", ""}},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCommaSpaceEOL",
|
Name: "TrailingCommaSpaceEOL",
|
||||||
Input: "a,b,c, \n",
|
Input: "§a,§b,§c, §\n",
|
||||||
Output: [][]string{{"a", "b", "c", ""}},
|
Output: [][]string{{"a", "b", "c", ""}},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCommaLine3",
|
Name: "TrailingCommaLine3",
|
||||||
Input: "a,b,c\nd,e,f\ng,hi,",
|
Input: "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
|
||||||
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
|
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "NotTrailingComma3",
|
Name: "NotTrailingComma3",
|
||||||
Input: "a,b,c, \n",
|
Input: "§a,§b,§c,§ \n",
|
||||||
Output: [][]string{{"a", "b", "c", " "}},
|
Output: [][]string{{"a", "b", "c", " "}},
|
||||||
}, {
|
}, {
|
||||||
Name: "CommaFieldTest",
|
Name: "CommaFieldTest",
|
||||||
Input: `x,y,z,w
|
Input: `§x,§y,§z,§w
|
||||||
x,y,z,
|
¶§x,§y,§z,§
|
||||||
x,y,,
|
¶§x,§y,§,§
|
||||||
x,,,
|
¶§x,§,§,§
|
||||||
,,,
|
¶§,§,§,§
|
||||||
"x","y","z","w"
|
¶§"x",§"y",§"z",§"w"
|
||||||
"x","y","z",""
|
¶§"x",§"y",§"z",§""
|
||||||
"x","y","",""
|
¶§"x",§"y",§"",§""
|
||||||
"x","","",""
|
¶§"x",§"",§"",§""
|
||||||
"","","",""
|
¶§"",§"",§"",§""
|
||||||
`,
|
`,
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"x", "y", "z", "w"},
|
{"x", "y", "z", "w"},
|
||||||
|
|
@ -209,184 +227,186 @@ x,,,
|
||||||
{"x", "", "", ""},
|
{"x", "", "", ""},
|
||||||
{"", "", "", ""},
|
{"", "", "", ""},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCommaIneffective1",
|
Name: "TrailingCommaIneffective1",
|
||||||
Input: "a,b,\nc,d,e",
|
Input: "§a,§b,§\n¶§c,§d,§e",
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"a", "b", ""},
|
{"a", "b", ""},
|
||||||
{"c", "d", "e"},
|
{"c", "d", "e"},
|
||||||
},
|
},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "ReadAllReuseRecord",
|
Name: "ReadAllReuseRecord",
|
||||||
Input: "a,b\nc,d",
|
Input: "§a,§b\n¶§c,§d",
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"a", "b"},
|
{"a", "b"},
|
||||||
{"c", "d"},
|
{"c", "d"},
|
||||||
},
|
},
|
||||||
ReuseRecord: true,
|
ReuseRecord: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "StartLine1", // Issue 19019
|
Name: "StartLine1", // Issue 19019
|
||||||
Input: "a,\"b\nc\"d,e",
|
Input: "§a,\"b\nc∑\"d,e",
|
||||||
Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
|
Errors: []error{&ParseError{Err: ErrQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "StartLine2",
|
Name: "StartLine2",
|
||||||
Input: "a,b\n\"d\n\n,e",
|
Input: "§a,§b\n¶§\"d\n\n,e∑",
|
||||||
Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
|
Errors: []error{nil, &ParseError{Err: ErrQuote}},
|
||||||
}, {
|
Output: [][]string{{"a", "b"}},
|
||||||
|
}, {
|
||||||
Name: "CRLFInQuotedField", // Issue 21201
|
Name: "CRLFInQuotedField", // Issue 21201
|
||||||
Input: "A,\"Hello\r\nHi\",B\r\n",
|
Input: "§A,§\"Hello\r\nHi\",§B\r\n",
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"A", "Hello\nHi", "B"},
|
{"A", "Hello\nHi", "B"},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Name: "BinaryBlobField", // Issue 19410
|
Name: "BinaryBlobField", // Issue 19410
|
||||||
Input: "x09\x41\xb4\x1c,aktau",
|
Input: "§x09\x41\xb4\x1c,§aktau",
|
||||||
Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
|
Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "TrailingCR",
|
Name: "TrailingCR",
|
||||||
Input: "field1,field2\r",
|
Input: "§field1,§field2\r",
|
||||||
Output: [][]string{{"field1", "field2"}},
|
Output: [][]string{{"field1", "field2"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "QuotedTrailingCR",
|
Name: "QuotedTrailingCR",
|
||||||
Input: "\"field\"\r",
|
Input: "§\"field\"\r",
|
||||||
Output: [][]string{{"field"}},
|
Output: [][]string{{"field"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "QuotedTrailingCRCR",
|
Name: "QuotedTrailingCRCR",
|
||||||
Input: "\"field\"\r\r",
|
Input: "§\"field∑\"\r\r",
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
|
Errors: []error{&ParseError{Err: ErrQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "FieldCR",
|
Name: "FieldCR",
|
||||||
Input: "field\rfield\r",
|
Input: "§field\rfield\r",
|
||||||
Output: [][]string{{"field\rfield"}},
|
Output: [][]string{{"field\rfield"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "FieldCRCR",
|
Name: "FieldCRCR",
|
||||||
Input: "field\r\rfield\r\r",
|
Input: "§field\r\rfield\r\r",
|
||||||
Output: [][]string{{"field\r\rfield\r"}},
|
Output: [][]string{{"field\r\rfield\r"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "FieldCRCRLF",
|
Name: "FieldCRCRLF",
|
||||||
Input: "field\r\r\nfield\r\r\n",
|
Input: "§field\r\r\n¶§field\r\r\n",
|
||||||
Output: [][]string{{"field\r"}, {"field\r"}},
|
Output: [][]string{{"field\r"}, {"field\r"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "FieldCRCRLFCR",
|
Name: "FieldCRCRLFCR",
|
||||||
Input: "field\r\r\n\rfield\r\r\n\r",
|
Input: "§field\r\r\n¶§\rfield\r\r\n\r",
|
||||||
Output: [][]string{{"field\r"}, {"\rfield\r"}},
|
Output: [][]string{{"field\r"}, {"\rfield\r"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "FieldCRCRLFCRCR",
|
Name: "FieldCRCRLFCRCR",
|
||||||
Input: "field\r\r\n\r\rfield\r\r\n\r\r",
|
Input: "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
|
||||||
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
|
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "MultiFieldCRCRLFCRCR",
|
Name: "MultiFieldCRCRLFCRCR",
|
||||||
Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
|
Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
|
||||||
Output: [][]string{
|
Output: [][]string{
|
||||||
{"field1", "field2\r"},
|
{"field1", "field2\r"},
|
||||||
{"\r\rfield1", "field2\r"},
|
{"\r\rfield1", "field2\r"},
|
||||||
{"\r\r", ""},
|
{"\r\r", ""},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
Name: "NonASCIICommaAndComment",
|
Name: "NonASCIICommaAndComment",
|
||||||
Input: "a£b,c£ \td,e\n€ comment\n",
|
Input: "§a£§b,c£ \t§d,e\n€ comment\n",
|
||||||
Output: [][]string{{"a", "b,c", "d,e"}},
|
Output: [][]string{{"a", "b,c", "d,e"}},
|
||||||
TrimLeadingSpace: true,
|
TrimLeadingSpace: true,
|
||||||
Comma: '£',
|
Comma: '£',
|
||||||
Comment: '€',
|
Comment: '€',
|
||||||
}, {
|
}, {
|
||||||
Name: "NonASCIICommaAndCommentWithQuotes",
|
Name: "NonASCIICommaAndCommentWithQuotes",
|
||||||
Input: "a€\" b,\"€ c\nλ comment\n",
|
Input: "§a€§\" b,\"€§ c\nλ comment\n",
|
||||||
Output: [][]string{{"a", " b,", " c"}},
|
Output: [][]string{{"a", " b,", " c"}},
|
||||||
Comma: '€',
|
Comma: '€',
|
||||||
Comment: 'λ',
|
Comment: 'λ',
|
||||||
}, {
|
}, {
|
||||||
// λ and θ start with the same byte.
|
// λ and θ start with the same byte.
|
||||||
// This tests that the parser doesn't confuse such characters.
|
// This tests that the parser doesn't confuse such characters.
|
||||||
Name: "NonASCIICommaConfusion",
|
Name: "NonASCIICommaConfusion",
|
||||||
Input: "\"abθcd\"λefθgh",
|
Input: "§\"abθcd\"λ§efθgh",
|
||||||
Output: [][]string{{"abθcd", "efθgh"}},
|
Output: [][]string{{"abθcd", "efθgh"}},
|
||||||
Comma: 'λ',
|
Comma: 'λ',
|
||||||
Comment: '€',
|
Comment: '€',
|
||||||
}, {
|
}, {
|
||||||
Name: "NonASCIICommentConfusion",
|
Name: "NonASCIICommentConfusion",
|
||||||
Input: "λ\nλ\nθ\nλ\n",
|
Input: "§λ\n¶§λ\nθ\n¶§λ\n",
|
||||||
Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
|
Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
|
||||||
Comment: 'θ',
|
Comment: 'θ',
|
||||||
}, {
|
}, {
|
||||||
Name: "QuotedFieldMultipleLF",
|
Name: "QuotedFieldMultipleLF",
|
||||||
Input: "\"\n\n\n\n\"",
|
Input: "§\"\n\n\n\n\"",
|
||||||
Output: [][]string{{"\n\n\n\n"}},
|
Output: [][]string{{"\n\n\n\n"}},
|
||||||
}, {
|
}, {
|
||||||
Name: "MultipleCRLF",
|
Name: "MultipleCRLF",
|
||||||
Input: "\r\n\r\n\r\n\r\n",
|
Input: "\r\n\r\n\r\n\r\n",
|
||||||
}, {
|
}, {
|
||||||
// The implementation may read each line in several chunks if it doesn't fit entirely
|
// The implementation may read each line in several chunks if it doesn't fit entirely
|
||||||
// in the read buffer, so we should test the code to handle that condition.
|
// in the read buffer, so we should test the code to handle that condition.
|
||||||
Name: "HugeLines",
|
Name: "HugeLines",
|
||||||
Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
|
Input: strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000),
|
||||||
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
|
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
|
||||||
Comment: '#',
|
Comment: '#',
|
||||||
}, {
|
}, {
|
||||||
Name: "QuoteWithTrailingCRLF",
|
Name: "QuoteWithTrailingCRLF",
|
||||||
Input: "\"foo\"bar\"\r\n",
|
Input: "§\"foo∑\"bar\"\r\n",
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
|
Errors: []error{&ParseError{Err: ErrQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "LazyQuoteWithTrailingCRLF",
|
Name: "LazyQuoteWithTrailingCRLF",
|
||||||
Input: "\"foo\"bar\"\r\n",
|
Input: "§\"foo\"bar\"\r\n",
|
||||||
Output: [][]string{{`foo"bar`}},
|
Output: [][]string{{`foo"bar`}},
|
||||||
LazyQuotes: true,
|
LazyQuotes: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "DoubleQuoteWithTrailingCRLF",
|
Name: "DoubleQuoteWithTrailingCRLF",
|
||||||
Input: "\"foo\"\"bar\"\r\n",
|
Input: "§\"foo\"\"bar\"\r\n",
|
||||||
Output: [][]string{{`foo"bar`}},
|
Output: [][]string{{`foo"bar`}},
|
||||||
}, {
|
}, {
|
||||||
Name: "EvenQuotes",
|
Name: "EvenQuotes",
|
||||||
Input: `""""""""`,
|
Input: `§""""""""`,
|
||||||
Output: [][]string{{`"""`}},
|
Output: [][]string{{`"""`}},
|
||||||
}, {
|
}, {
|
||||||
Name: "OddQuotes",
|
Name: "OddQuotes",
|
||||||
Input: `"""""""`,
|
Input: `§"""""""∑`,
|
||||||
Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
|
Errors: []error{&ParseError{Err: ErrQuote}},
|
||||||
}, {
|
}, {
|
||||||
Name: "LazyOddQuotes",
|
Name: "LazyOddQuotes",
|
||||||
Input: `"""""""`,
|
Input: `§"""""""`,
|
||||||
Output: [][]string{{`"""`}},
|
Output: [][]string{{`"""`}},
|
||||||
LazyQuotes: true,
|
LazyQuotes: true,
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComma1",
|
Name: "BadComma1",
|
||||||
Comma: '\n',
|
Comma: '\n',
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComma2",
|
Name: "BadComma2",
|
||||||
Comma: '\r',
|
Comma: '\r',
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComma3",
|
Name: "BadComma3",
|
||||||
Comma: '"',
|
Comma: '"',
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComma4",
|
Name: "BadComma4",
|
||||||
Comma: utf8.RuneError,
|
Comma: utf8.RuneError,
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComment1",
|
Name: "BadComment1",
|
||||||
Comment: '\n',
|
Comment: '\n',
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComment2",
|
Name: "BadComment2",
|
||||||
Comment: '\r',
|
Comment: '\r',
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadComment3",
|
Name: "BadComment3",
|
||||||
Comment: utf8.RuneError,
|
Comment: utf8.RuneError,
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}, {
|
}, {
|
||||||
Name: "BadCommaComment",
|
Name: "BadCommaComment",
|
||||||
Comma: 'X',
|
Comma: 'X',
|
||||||
Comment: 'X',
|
Comment: 'X',
|
||||||
Error: errInvalidDelim,
|
Errors: []error{errInvalidDelim},
|
||||||
}}
|
}}
|
||||||
|
|
||||||
for _, tt := range tests {
|
func TestRead(t *testing.T) {
|
||||||
t.Run(tt.Name, func(t *testing.T) {
|
newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int) {
|
||||||
r := NewReader(strings.NewReader(tt.Input))
|
positions, errPositions, input := makePositions(tt.Input)
|
||||||
|
r := NewReader(strings.NewReader(input))
|
||||||
|
|
||||||
if tt.Comma != 0 {
|
if tt.Comma != 0 {
|
||||||
r.Comma = tt.Comma
|
r.Comma = tt.Comma
|
||||||
|
|
@ -400,17 +420,138 @@ x,,,
|
||||||
r.LazyQuotes = tt.LazyQuotes
|
r.LazyQuotes = tt.LazyQuotes
|
||||||
r.TrimLeadingSpace = tt.TrimLeadingSpace
|
r.TrimLeadingSpace = tt.TrimLeadingSpace
|
||||||
r.ReuseRecord = tt.ReuseRecord
|
r.ReuseRecord = tt.ReuseRecord
|
||||||
|
return r, positions, errPositions
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range readTests {
|
||||||
|
t.Run(tt.Name, func(t *testing.T) {
|
||||||
|
r, positions, errPositions := newReader(tt)
|
||||||
out, err := r.ReadAll()
|
out, err := r.ReadAll()
|
||||||
if !reflect.DeepEqual(err, tt.Error) {
|
if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
|
||||||
t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error)
|
if !reflect.DeepEqual(err, wantErr) {
|
||||||
} else if !reflect.DeepEqual(out, tt.Output) {
|
t.Fatalf("ReadAll() error mismatch:\ngot %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr)
|
||||||
t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
|
}
|
||||||
|
if out != nil {
|
||||||
|
t.Fatalf("ReadAll() output:\ngot %q\nwant nil", out)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected Readall() error: %v", err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(out, tt.Output) {
|
||||||
|
t.Fatalf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check field and error positions.
|
||||||
|
r, _, _ = newReader(tt)
|
||||||
|
for recNum := 0; ; recNum++ {
|
||||||
|
rec, err := r.Read()
|
||||||
|
var wantErr error
|
||||||
|
if recNum < len(tt.Errors) && tt.Errors[recNum] != nil {
|
||||||
|
wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions)
|
||||||
|
} else if recNum >= len(tt.Output) {
|
||||||
|
wantErr = io.EOF
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(err, wantErr) {
|
||||||
|
t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr)
|
||||||
|
}
|
||||||
|
// ErrFieldCount is explicitly non-fatal.
|
||||||
|
if err != nil && !errors.Is(err, ErrFieldCount) {
|
||||||
|
if recNum < len(tt.Output) {
|
||||||
|
t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output))
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) {
|
||||||
|
t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want)
|
||||||
|
}
|
||||||
|
pos := positions[recNum]
|
||||||
|
if len(pos) != len(rec) {
|
||||||
|
t.Fatalf("mismatched position length at record %d", recNum)
|
||||||
|
}
|
||||||
|
for i := range rec {
|
||||||
|
line, col := r.FieldPos(i)
|
||||||
|
if got, want := [2]int{line, col}, pos[i]; got != want {
|
||||||
|
t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// firstError returns the first non-nil error in errs,
|
||||||
|
// with the position adjusted according to the error's
|
||||||
|
// index inside positions.
|
||||||
|
func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error {
|
||||||
|
for i, err := range errs {
|
||||||
|
if err != nil {
|
||||||
|
return errorWithPosition(err, i, positions, errPositions)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error {
|
||||||
|
parseErr, ok := err.(*ParseError)
|
||||||
|
if !ok {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if recNum >= len(positions) {
|
||||||
|
panic(fmt.Errorf("no positions found for error at record %d", recNum))
|
||||||
|
}
|
||||||
|
errPos, ok := errPositions[recNum]
|
||||||
|
if !ok {
|
||||||
|
panic(fmt.Errorf("no error position found for error at record %d", recNum))
|
||||||
|
}
|
||||||
|
parseErr1 := *parseErr
|
||||||
|
parseErr1.StartLine = positions[recNum][0][0]
|
||||||
|
parseErr1.Line = errPos[0]
|
||||||
|
parseErr1.Column = errPos[1]
|
||||||
|
return &parseErr1
|
||||||
|
}
|
||||||
|
|
||||||
|
// makePositions returns the expected field positions of all
|
||||||
|
// the fields in text, the positions of any errors, and the text with the position markers
|
||||||
|
// removed.
|
||||||
|
//
|
||||||
|
// The start of each field is marked with a § symbol;
|
||||||
|
// CSV lines are separated by ¶ symbols;
|
||||||
|
// Error positions are marked with ∑ symbols.
|
||||||
|
func makePositions(text string) ([][][2]int, map[int][2]int, string) {
|
||||||
|
buf := make([]byte, 0, len(text))
|
||||||
|
var positions [][][2]int
|
||||||
|
errPositions := make(map[int][2]int)
|
||||||
|
line, col := 1, 1
|
||||||
|
recNum := 0
|
||||||
|
|
||||||
|
for len(text) > 0 {
|
||||||
|
r, size := utf8.DecodeRuneInString(text)
|
||||||
|
switch r {
|
||||||
|
case '\n':
|
||||||
|
line++
|
||||||
|
col = 1
|
||||||
|
buf = append(buf, '\n')
|
||||||
|
case '§':
|
||||||
|
if len(positions) == 0 {
|
||||||
|
positions = append(positions, [][2]int{})
|
||||||
|
}
|
||||||
|
positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col})
|
||||||
|
case '¶':
|
||||||
|
positions = append(positions, [][2]int{})
|
||||||
|
recNum++
|
||||||
|
case '∑':
|
||||||
|
errPositions[recNum] = [2]int{line, col}
|
||||||
|
default:
|
||||||
|
buf = append(buf, text[:size]...)
|
||||||
|
col += size
|
||||||
|
}
|
||||||
|
text = text[size:]
|
||||||
|
}
|
||||||
|
return positions, errPositions, string(buf)
|
||||||
|
}
|
||||||
|
|
||||||
// nTimes is an io.Reader which yields the string s n times.
|
// nTimes is an io.Reader which yields the string s n times.
|
||||||
type nTimes struct {
|
type nTimes struct {
|
||||||
s string
|
s string
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue