mirror of
https://github.com/python/cpython.git
synced 2026-03-24 13:40:57 +00:00
Patch from Thomas Barr so that csv.Sniffer will set doublequote property.
Closes issue 6606.
This commit is contained in:
parent
17565e5b7b
commit
b4fd4d37a1
2 changed files with 25 additions and 6 deletions
22
Lib/csv.py
22
Lib/csv.py
|
|
@ -170,7 +170,7 @@ def sniff(self, sample, delimiters=None):
|
|||
Returns a dialect (or None) corresponding to the sample
|
||||
"""
|
||||
|
||||
quotechar, delimiter, skipinitialspace = \
|
||||
quotechar, doublequote, delimiter, skipinitialspace = \
|
||||
self._guess_quote_and_delimiter(sample, delimiters)
|
||||
if not delimiter:
|
||||
delimiter, skipinitialspace = self._guess_delimiter(sample,
|
||||
|
|
@ -184,8 +184,8 @@ class dialect(Dialect):
|
|||
lineterminator = '\r\n'
|
||||
quoting = QUOTE_MINIMAL
|
||||
# escapechar = ''
|
||||
doublequote = False
|
||||
|
||||
dialect.doublequote = doublequote
|
||||
dialect.delimiter = delimiter
|
||||
# _csv.reader won't accept a quotechar of ''
|
||||
dialect.quotechar = quotechar or '"'
|
||||
|
|
@ -217,8 +217,8 @@ def _guess_quote_and_delimiter(self, data, delimiters):
|
|||
break
|
||||
|
||||
if not matches:
|
||||
return ('', None, 0) # (quotechar, delimiter, skipinitialspace)
|
||||
|
||||
# (quotechar, doublequote, delimiter, skipinitialspace)
|
||||
return ('', False, None, 0)
|
||||
quotes = {}
|
||||
delims = {}
|
||||
spaces = 0
|
||||
|
|
@ -255,7 +255,19 @@ def _guess_quote_and_delimiter(self, data, delimiters):
|
|||
delim = ''
|
||||
skipinitialspace = 0
|
||||
|
||||
return (quotechar, delim, skipinitialspace)
|
||||
# if we see an extra quote between delimiters, we've got a
|
||||
# double quoted format
|
||||
dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
|
||||
{'delim':delim, 'quote':quotechar}, re.MULTILINE)
|
||||
|
||||
|
||||
|
||||
if dq_regexp.search(data):
|
||||
doublequote = True
|
||||
else:
|
||||
doublequote = False
|
||||
|
||||
return (quotechar, doublequote, delim, skipinitialspace)
|
||||
|
||||
|
||||
def _guess_delimiter(self, data, delimiters):
|
||||
|
|
|
|||
|
|
@ -891,7 +891,7 @@ class TestSniffer(unittest.TestCase):
|
|||
'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
|
||||
'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
|
||||
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
|
||||
'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
|
||||
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
|
||||
"""
|
||||
header = '''\
|
||||
"venue","city","state","date","performers"
|
||||
|
|
@ -950,6 +950,13 @@ def test_delimiters(self):
|
|||
self.assertEqual(dialect.delimiter, "|")
|
||||
self.assertEqual(dialect.quotechar, "'")
|
||||
|
||||
def test_doublequote(self):
|
||||
sniffer = csv.Sniffer()
|
||||
dialect = sniffer.sniff(self.header)
|
||||
self.assertFalse(dialect.doublequote)
|
||||
dialect = sniffer.sniff(self.sample2)
|
||||
self.assertTrue(dialect.doublequote)
|
||||
|
||||
if not hasattr(sys, "gettotalrefcount"):
|
||||
if test_support.verbose: print "*** skipping leakage tests ***"
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue