go/src/encoding/csv/reader_test.go

510 lines
14 KiB
Go
Raw Normal View History

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package csv
import (
"io"
"reflect"
"strings"
"testing"
"unicode/utf8"
)
func TestRead(t *testing.T) {
tests := []struct {
Name string
Input string
Output [][]string
Error error
// These fields are copied into the Reader
Comma rune
Comment rune
UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
FieldsPerRecord int
LazyQuotes bool
TrimLeadingSpace bool
ReuseRecord bool
}{{
Name: "Simple",
Input: "a,b,c\n",
Output: [][]string{{"a", "b", "c"}},
}, {
Name: "CRLF",
Input: "a,b\r\nc,d\r\n",
Output: [][]string{{"a", "b"}, {"c", "d"}},
}, {
Name: "BareCR",
Input: "a,b\rc,d\r\n",
Output: [][]string{{"a", "b\rc", "d"}},
}, {
Name: "RFC4180test",
Input: `#field1,field2,field3
"aaa","bb
b","ccc"
"a,a","b""bb","ccc"
zzz,yyy,xxx
`,
Output: [][]string{
{"#field1", "field2", "field3"},
{"aaa", "bb\nb", "ccc"},
{"a,a", `b"bb`, "ccc"},
{"zzz", "yyy", "xxx"},
},
UseFieldsPerRecord: true,
FieldsPerRecord: 0,
}, {
Name: "NoEOLTest",
Input: "a,b,c",
Output: [][]string{{"a", "b", "c"}},
}, {
Name: "Semicolon",
Input: "a;b;c\n",
Output: [][]string{{"a", "b", "c"}},
Comma: ';',
}, {
Name: "MultiLine",
Input: `"two
line","one line","three
line
field"`,
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
}, {
Name: "BlankLine",
Input: "a,b,c\n\nd,e,f\n\n",
Output: [][]string{
{"a", "b", "c"},
{"d", "e", "f"},
},
}, {
Name: "BlankLineFieldCount",
Input: "a,b,c\n\nd,e,f\n\n",
Output: [][]string{
{"a", "b", "c"},
{"d", "e", "f"},
},
UseFieldsPerRecord: true,
FieldsPerRecord: 0,
}, {
Name: "TrimSpace",
Input: " a, b, c\n",
Output: [][]string{{"a", "b", "c"}},
TrimLeadingSpace: true,
}, {
Name: "LeadingSpace",
Input: " a, b, c\n",
Output: [][]string{{" a", " b", " c"}},
}, {
Name: "Comment",
Input: "#1,2,3\na,b,c\n#comment",
Output: [][]string{{"a", "b", "c"}},
Comment: '#',
}, {
Name: "NoComment",
Input: "#1,2,3\na,b,c",
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
}, {
Name: "LazyQuotes",
Input: `a "word","1"2",a","b`,
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
LazyQuotes: true,
}, {
Name: "BareQuotes",
Input: `a "word","1"2",a"`,
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
LazyQuotes: true,
}, {
Name: "BareDoubleQuotes",
Input: `a""b,c`,
Output: [][]string{{`a""b`, `c`}},
LazyQuotes: true,
}, {
Name: "BadDoubleQuotes",
Input: `a""b,c`,
Error: &ParseError{StartLine: 1, Line: 1, Column: 1, Err: ErrBareQuote},
}, {
Name: "TrimQuote",
Input: ` "a"," b",c`,
Output: [][]string{{"a", " b", "c"}},
TrimLeadingSpace: true,
}, {
Name: "BadBareQuote",
Input: `a "word","b"`,
Error: &ParseError{StartLine: 1, Line: 1, Column: 2, Err: ErrBareQuote},
}, {
Name: "BadTrailingQuote",
Input: `"a word",b"`,
Error: &ParseError{StartLine: 1, Line: 1, Column: 10, Err: ErrBareQuote},
}, {
Name: "ExtraneousQuote",
Input: `"a "word","b"`,
Error: &ParseError{StartLine: 1, Line: 1, Column: 3, Err: ErrQuote},
}, {
Name: "BadFieldCount",
Input: "a,b,c\nd,e",
Error: &ParseError{StartLine: 2, Line: 2, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 0,
}, {
Name: "BadFieldCount1",
Input: `a,b,c`,
Error: &ParseError{StartLine: 1, Line: 1, Err: ErrFieldCount},
UseFieldsPerRecord: true,
FieldsPerRecord: 2,
}, {
Name: "FieldCount",
Input: "a,b,c\nd,e",
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
}, {
Name: "TrailingCommaEOF",
Input: "a,b,c,",
Output: [][]string{{"a", "b", "c", ""}},
}, {
Name: "TrailingCommaEOL",
Input: "a,b,c,\n",
Output: [][]string{{"a", "b", "c", ""}},
}, {
Name: "TrailingCommaSpaceEOF",
Input: "a,b,c, ",
Output: [][]string{{"a", "b", "c", ""}},
TrimLeadingSpace: true,
}, {
Name: "TrailingCommaSpaceEOL",
Input: "a,b,c, \n",
Output: [][]string{{"a", "b", "c", ""}},
TrimLeadingSpace: true,
}, {
Name: "TrailingCommaLine3",
Input: "a,b,c\nd,e,f\ng,hi,",
Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
TrimLeadingSpace: true,
}, {
Name: "NotTrailingComma3",
Input: "a,b,c, \n",
Output: [][]string{{"a", "b", "c", " "}},
}, {
Name: "CommaFieldTest",
Input: `x,y,z,w
x,y,z,
x,y,,
x,,,
,,,
"x","y","z","w"
"x","y","z",""
"x","y","",""
"x","","",""
"","","",""
`,
Output: [][]string{
{"x", "y", "z", "w"},
{"x", "y", "z", ""},
{"x", "y", "", ""},
{"x", "", "", ""},
{"", "", "", ""},
{"x", "y", "z", "w"},
{"x", "y", "z", ""},
{"x", "y", "", ""},
{"x", "", "", ""},
{"", "", "", ""},
},
}, {
Name: "TrailingCommaIneffective1",
Input: "a,b,\nc,d,e",
Output: [][]string{
{"a", "b", ""},
{"c", "d", "e"},
},
TrimLeadingSpace: true,
}, {
Name: "ReadAllReuseRecord",
Input: "a,b\nc,d",
encoding/csv: add option to reuse slices returned by Read In many cases the records returned by Reader.Read will only be used between calls to Read and become garbage once a new record is read. In this case, instead of allocating a new slice on each call to Read, we can reuse the last allocated slice for successive calls to avoid unnecessary allocations. This change adds a new field ReuseRecord to the Reader struct to enable this reuse. ReuseRecord is false by default to avoid breaking existing code which dependss on the current behaviour. I also added 4 new benchmarks, corresponding to the existing Read benchmarks, which set ReuseRecord to true. Benchstat on my local machine (old is ReuseRecord = false, new is ReuseRecord = true) name old time/op new time/op delta Read-8 2.75µs ± 2% 1.88µs ± 1% -31.52% (p=0.000 n=14+15) ReadWithFieldsPerRecord-8 2.75µs ± 0% 1.89µs ± 1% -31.43% (p=0.000 n=13+13) ReadWithoutFieldsPerRecord-8 2.77µs ± 1% 1.88µs ± 1% -32.06% (p=0.000 n=15+15) ReadLargeFields-8 55.4µs ± 1% 54.2µs ± 0% -2.07% (p=0.000 n=15+14) name old alloc/op new alloc/op delta Read-8 664B ± 0% 24B ± 0% -96.39% (p=0.000 n=15+15) ReadWithFieldsPerRecord-8 664B ± 0% 24B ± 0% -96.39% (p=0.000 n=15+15) ReadWithoutFieldsPerRecord-8 664B ± 0% 24B ± 0% -96.39% (p=0.000 n=15+15) ReadLargeFields-8 3.94kB ± 0% 2.98kB ± 0% -24.39% (p=0.000 n=15+15) name old allocs/op new allocs/op delta Read-8 18.0 ± 0% 8.0 ± 0% -55.56% (p=0.000 n=15+15) ReadWithFieldsPerRecord-8 18.0 ± 0% 8.0 ± 0% -55.56% (p=0.000 n=15+15) ReadWithoutFieldsPerRecord-8 18.0 ± 0% 8.0 ± 0% -55.56% (p=0.000 n=15+15) ReadLargeFields-8 24.0 ± 0% 12.0 ± 0% -50.00% (p=0.000 n=15+15) Fixes #19721 Change-Id: I79b14128bb9bb3465f53f40f93b1b528a9da6f58 Reviewed-on: https://go-review.googlesource.com/41730 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-25 21:23:34 +02:00
Output: [][]string{
{"a", "b"},
{"c", "d"},
},
ReuseRecord: true,
}, {
Name: "StartLine1", // Issue 19019
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Input: "a,\"b\nc\"d,e",
Error: &ParseError{StartLine: 1, Line: 2, Column: 1, Err: ErrQuote},
}, {
Name: "StartLine2",
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Input: "a,b\n\"d\n\n,e",
Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
}, {
Name: "CRLFInQuotedField", // Issue 21201
Input: "A,\"Hello\r\nHi\",B\r\n",
Output: [][]string{
{"A", "Hello\nHi", "B"},
},
}, {
Name: "BinaryBlobField", // Issue 19410
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Input: "x09\x41\xb4\x1c,aktau",
Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "TrailingCR",
Input: "field1,field2\r",
Output: [][]string{{"field1", "field2"}},
}, {
Name: "QuotedTrailingCR",
Input: "\"field\"\r",
Output: [][]string{{"field"}},
}, {
Name: "QuotedTrailingCRCR",
Input: "\"field\"\r\r",
Error: &ParseError{StartLine: 1, Line: 1, Column: 6, Err: ErrQuote},
}, {
Name: "FieldCR",
Input: "field\rfield\r",
Output: [][]string{{"field\rfield"}},
}, {
Name: "FieldCRCR",
Input: "field\r\rfield\r\r",
Output: [][]string{{"field\r\rfield\r"}},
}, {
Name: "FieldCRCRLF",
Input: "field\r\r\nfield\r\r\n",
Output: [][]string{{"field\r"}, {"field\r"}},
}, {
Name: "FieldCRCRLFCR",
Input: "field\r\r\n\rfield\r\r\n\r",
Output: [][]string{{"field\r"}, {"\rfield\r"}},
}, {
Name: "FieldCRCRLFCRCR",
Input: "field\r\r\n\r\rfield\r\r\n\r\r",
Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
}, {
Name: "MultiFieldCRCRLFCRCR",
Input: "field1,field2\r\r\n\r\rfield1,field2\r\r\n\r\r,",
Output: [][]string{
{"field1", "field2\r"},
{"\r\rfield1", "field2\r"},
{"\r\r", ""},
},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "NonASCIICommaAndComment",
Input: "a£b,c£ \td,e\n€ comment\n",
Output: [][]string{{"a", "b,c", "d,e"}},
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
TrimLeadingSpace: true,
Comma: '£',
Comment: '€',
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "NonASCIICommaAndCommentWithQuotes",
Input: "a€\" b,\"€ c\nλ comment\n",
Output: [][]string{{"a", " b,", " c"}},
Comma: '€',
Comment: 'λ',
}, {
// λ and θ start with the same byte.
// This tests that the parser doesn't confuse such characters.
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "NonASCIICommaConfusion",
Input: "\"abθcd\"λefθgh",
Output: [][]string{{"abθcd", "efθgh"}},
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Comma: 'λ',
Comment: '€',
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "NonASCIICommentConfusion",
Input: "λ\nλ\nθ\nλ\n",
Output: [][]string{{"λ"}, {"λ"}, {"λ"}},
Comment: 'θ',
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "QuotedFieldMultipleLF",
Input: "\"\n\n\n\n\"",
Output: [][]string{{"\n\n\n\n"}},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "MultipleCRLF",
Input: "\r\n\r\n\r\n\r\n",
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
// The implementation may read each line in several chunks if it doesn't fit entirely
// in the read buffer, so we should test the code to handle that condition.
Name: "HugeLines",
Input: strings.Repeat("#ignore\n", 10000) + strings.Repeat("@", 5000) + "," + strings.Repeat("*", 5000),
Output: [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
Comment: '#',
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "QuoteWithTrailingCRLF",
Input: "\"foo\"bar\"\r\n",
Error: &ParseError{StartLine: 1, Line: 1, Column: 4, Err: ErrQuote},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "LazyQuoteWithTrailingCRLF",
Input: "\"foo\"bar\"\r\n",
Output: [][]string{{`foo"bar`}},
LazyQuotes: true,
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "DoubleQuoteWithTrailingCRLF",
Input: "\"foo\"\"bar\"\r\n",
Output: [][]string{{`foo"bar`}},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "EvenQuotes",
Input: `""""""""`,
Output: [][]string{{`"""`}},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "OddQuotes",
Input: `"""""""`,
Error: &ParseError{StartLine: 1, Line: 1, Column: 7, Err: ErrQuote},
}, {
encoding/csv: simplify and optimize Reader The Reader implementation is slow because it operates on a rune-by-rune basis via bufio.Reader.ReadRune. We speed this up by operating on entire lines that we read from bufio.Reader.ReadSlice. In order to ensure that we read the full line, we augment ReadSlice in our Reader.readLine method to automatically expand the slice if bufio.ErrBufferFull is every hit. This change happens to fix #19410 because it no longer relies on rune-by-rune parsing and only searches for the relevant delimiter rune. In order to keep column accounting simple and consistent, this change reverts parts of CL 52830. This CL is an alternative to CL 36270 and builds on some of the ideas from that change by Diogo Pinela. name old time/op new time/op delta Read-8 3.12µs ± 1% 2.54µs ± 2% -18.76% (p=0.000 n=10+9) ReadWithFieldsPerRecord-8 3.12µs ± 1% 2.53µs ± 1% -18.91% (p=0.000 n=9+9) ReadWithoutFieldsPerRecord-8 3.13µs ± 0% 2.57µs ± 3% -18.07% (p=0.000 n=10+10) ReadLargeFields-8 52.3µs ± 1% 5.3µs ± 2% -89.93% (p=0.000 n=10+9) ReadReuseRecord-8 2.05µs ± 1% 1.40µs ± 1% -31.48% (p=0.000 n=10+9) ReadReuseRecordWithFieldsPerRecord-8 2.05µs ± 1% 1.41µs ± 0% -31.03% (p=0.000 n=10+9) ReadReuseRecordWithoutFieldsPerRecord-8 2.06µs ± 1% 1.40µs ± 1% -31.70% (p=0.000 n=9+10) ReadReuseRecordLargeFields-8 50.9µs ± 0% 4.1µs ± 3% -92.01% (p=0.000 n=10+10) name old alloc/op new alloc/op Read-8 664B ± 0% 664B ± 0% ReadWithFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadWithoutFieldsPerRecord-8 664B ± 0% 664B ± 0% ReadLargeFields-8 3.94kB ± 0% 3.94kB ± 0% ReadReuseRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 24.0B ± 0% 24.0B ± 0% ReadReuseRecordLargeFields-8 2.98kB ± 0% 2.98kB ± 0% name old allocs/op new allocs/op Read-8 18.0 ± 0% 18.0 ± 0% ReadWithFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadWithoutFieldsPerRecord-8 18.0 ± 0% 18.0 ± 0% ReadLargeFields-8 24.0 ± 0% 24.0 ± 0% ReadReuseRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordWithoutFieldsPerRecord-8 8.00 ± 0% 8.00 ± 0% ReadReuseRecordLargeFields-8 12.0 ± 0% 12.0 ± 0% Updates #22352 Updates #19019 Fixes #16791 Fixes #19410 Change-Id: I31c27cfcc56880e6abac262f36c947179b550bbf Reviewed-on: https://go-review.googlesource.com/72150 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-10-20 05:26:43 -07:00
Name: "LazyOddQuotes",
Input: `"""""""`,
Output: [][]string{{`"""`}},
LazyQuotes: true,
}, {
Name: "BadComma1",
Comma: '\n',
Error: errInvalidDelim,
}, {
Name: "BadComma2",
Comma: '\r',
Error: errInvalidDelim,
}, {
Name: "BadComma3",
Comma: '"',
Error: errInvalidDelim,
}, {
Name: "BadComma4",
Comma: utf8.RuneError,
Error: errInvalidDelim,
}, {
Name: "BadComment1",
Comment: '\n',
Error: errInvalidDelim,
}, {
Name: "BadComment2",
Comment: '\r',
Error: errInvalidDelim,
}, {
Name: "BadComment3",
Comment: utf8.RuneError,
Error: errInvalidDelim,
}, {
Name: "BadCommaComment",
Comma: 'X',
Comment: 'X',
Error: errInvalidDelim,
}}
for _, tt := range tests {
t.Run(tt.Name, func(t *testing.T) {
r := NewReader(strings.NewReader(tt.Input))
if tt.Comma != 0 {
r.Comma = tt.Comma
}
r.Comment = tt.Comment
if tt.UseFieldsPerRecord {
r.FieldsPerRecord = tt.FieldsPerRecord
} else {
r.FieldsPerRecord = -1
}
r.LazyQuotes = tt.LazyQuotes
r.TrimLeadingSpace = tt.TrimLeadingSpace
r.ReuseRecord = tt.ReuseRecord
out, err := r.ReadAll()
if !reflect.DeepEqual(err, tt.Error) {
t.Errorf("ReadAll() error:\ngot %v\nwant %v", err, tt.Error)
} else if !reflect.DeepEqual(out, tt.Output) {
t.Errorf("ReadAll() output:\ngot %q\nwant %q", out, tt.Output)
}
})
}
}
// nTimes is an io.Reader which yields the string s n times.
type nTimes struct {
s string
n int
off int
}
func (r *nTimes) Read(p []byte) (n int, err error) {
for {
if r.n <= 0 || r.s == "" {
return n, io.EOF
}
n0 := copy(p, r.s[r.off:])
p = p[n0:]
n += n0
r.off += n0
if r.off == len(r.s) {
r.off = 0
r.n--
}
if len(p) == 0 {
return
}
}
}
// benchmarkRead measures reading the provided CSV rows data.
// initReader, if non-nil, modifies the Reader before it's used.
func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
b.ReportAllocs()
r := NewReader(&nTimes{s: rows, n: b.N})
if initReader != nil {
initReader(r)
}
for {
_, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
b.Fatal(err)
}
}
}
const benchmarkCSVData = `x,y,z,w
x,y,z,
x,y,,
x,,,
,,,
"x","y","z","w"
"x","y","z",""
"x","y","",""
"x","","",""
"","","",""
`
func BenchmarkRead(b *testing.B) {
benchmarkRead(b, nil, benchmarkCSVData)
}
func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
}
func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
}
func BenchmarkReadLargeFields(b *testing.B) {
benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
`, 3))
}
encoding/csv: add option to reuse slices returned by Read In many cases the records returned by Reader.Read will only be used between calls to Read and become garbage once a new record is read. In this case, instead of allocating a new slice on each call to Read, we can reuse the last allocated slice for successive calls to avoid unnecessary allocations. This change adds a new field ReuseRecord to the Reader struct to enable this reuse. ReuseRecord is false by default to avoid breaking existing code which dependss on the current behaviour. I also added 4 new benchmarks, corresponding to the existing Read benchmarks, which set ReuseRecord to true. Benchstat on my local machine (old is ReuseRecord = false, new is ReuseRecord = true) name old time/op new time/op delta Read-8 2.75µs ± 2% 1.88µs ± 1% -31.52% (p=0.000 n=14+15) ReadWithFieldsPerRecord-8 2.75µs ± 0% 1.89µs ± 1% -31.43% (p=0.000 n=13+13) ReadWithoutFieldsPerRecord-8 2.77µs ± 1% 1.88µs ± 1% -32.06% (p=0.000 n=15+15) ReadLargeFields-8 55.4µs ± 1% 54.2µs ± 0% -2.07% (p=0.000 n=15+14) name old alloc/op new alloc/op delta Read-8 664B ± 0% 24B ± 0% -96.39% (p=0.000 n=15+15) ReadWithFieldsPerRecord-8 664B ± 0% 24B ± 0% -96.39% (p=0.000 n=15+15) ReadWithoutFieldsPerRecord-8 664B ± 0% 24B ± 0% -96.39% (p=0.000 n=15+15) ReadLargeFields-8 3.94kB ± 0% 2.98kB ± 0% -24.39% (p=0.000 n=15+15) name old allocs/op new allocs/op delta Read-8 18.0 ± 0% 8.0 ± 0% -55.56% (p=0.000 n=15+15) ReadWithFieldsPerRecord-8 18.0 ± 0% 8.0 ± 0% -55.56% (p=0.000 n=15+15) ReadWithoutFieldsPerRecord-8 18.0 ± 0% 8.0 ± 0% -55.56% (p=0.000 n=15+15) ReadLargeFields-8 24.0 ± 0% 12.0 ± 0% -50.00% (p=0.000 n=15+15) Fixes #19721 Change-Id: I79b14128bb9bb3465f53f40f93b1b528a9da6f58 Reviewed-on: https://go-review.googlesource.com/41730 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-25 21:23:34 +02:00
func BenchmarkReadReuseRecord(b *testing.B) {
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
}
func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
}
func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
}
func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
`, 3))
}