mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	
		
			
	
	
		
			531 lines
		
	
	
	
		
			19 KiB
		
	
	
	
		
			ReStructuredText
		
	
	
	
	
	
		
		
			
		
	
	
			531 lines
		
	
	
	
		
			19 KiB
		
	
	
	
		
			ReStructuredText
		
	
	
	
	
	
|   | 
 | ||
|  | :mod:`csv` --- CSV File Reading and Writing
 | ||
|  | ===========================================
 | ||
|  | 
 | ||
|  | .. module:: csv
 | ||
|  |    :synopsis: Write and read tabular data to and from delimited files.
 | ||
|  | .. sectionauthor:: Skip Montanaro <skip@pobox.com>
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. versionadded:: 2.3
 | ||
|  | 
 | ||
|  | .. index::
 | ||
|  |    single: csv
 | ||
|  |    pair: data; tabular
 | ||
|  | 
 | ||
|  | The so-called CSV (Comma Separated Values) format is the most common import and
 | ||
|  | export format for spreadsheets and databases.  There is no "CSV standard", so
 | ||
|  | the format is operationally defined by the many applications which read and
 | ||
|  | write it.  The lack of a standard means that subtle differences often exist in
 | ||
|  | the data produced and consumed by different applications.  These differences can
 | ||
|  | make it annoying to process CSV files from multiple sources.  Still, while the
 | ||
|  | delimiters and quoting characters vary, the overall format is similar enough
 | ||
|  | that it is possible to write a single module which can efficiently manipulate
 | ||
|  | such data, hiding the details of reading and writing the data from the
 | ||
|  | programmer.
 | ||
|  | 
 | ||
|  | The :mod:`csv` module implements classes to read and write tabular data in CSV
 | ||
|  | format.  It allows programmers to say, "write this data in the format preferred
 | ||
|  | by Excel," or "read data from this file which was generated by Excel," without
 | ||
|  | knowing the precise details of the CSV format used by Excel.  Programmers can
 | ||
|  | also describe the CSV formats understood by other applications or define their
 | ||
|  | own special-purpose CSV formats.
 | ||
|  | 
 | ||
|  | The :mod:`csv` module's :class:`reader` and :class:`writer` objects read and
 | ||
|  | write sequences.  Programmers can also read and write data in dictionary form
 | ||
|  | using the :class:`DictReader` and :class:`DictWriter` classes.
 | ||
|  | 
 | ||
|  | .. note::
 | ||
|  | 
 | ||
|  |    This version of the :mod:`csv` module doesn't support Unicode input.  Also,
 | ||
|  |    there are currently some issues regarding ASCII NUL characters.  Accordingly,
 | ||
|  |    all input should be UTF-8 or printable ASCII to be safe; see the examples in
 | ||
|  |    section :ref:`csv-examples`. These restrictions will be removed in the future.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. seealso::
 | ||
|  | 
 | ||
|  |    .. % \seemodule{array}{Arrays of uniformly types numeric values.}
 | ||
|  | 
 | ||
|  |    :pep:`305` - CSV File API
 | ||
|  |       The Python Enhancement Proposal which proposed this addition to Python.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. _csv-contents:
 | ||
|  | 
 | ||
|  | Module Contents
 | ||
|  | ---------------
 | ||
|  | 
 | ||
|  | The :mod:`csv` module defines the following functions:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: reader(csvfile[, dialect='excel'][, fmtparam])
 | ||
|  | 
 | ||
|  |    Return a reader object which will iterate over lines in the given *csvfile*.
 | ||
|  |    *csvfile* can be any object which supports the iterator protocol and returns a
 | ||
|  |    string each time its :meth:`next` method is called --- file objects and list
 | ||
|  |    objects are both suitable.   If *csvfile* is a file object, it must be opened
 | ||
|  |    with the 'b' flag on platforms where that makes a difference.  An optional
 | ||
|  |    *dialect* parameter can be given which is used to define a set of parameters
 | ||
|  |    specific to a particular CSV dialect.  It may be an instance of a subclass of
 | ||
|  |    the :class:`Dialect` class or one of the strings returned by the
 | ||
|  |    :func:`list_dialects` function.  The other optional *fmtparam* keyword arguments
 | ||
|  |    can be given to override individual formatting parameters in the current
 | ||
|  |    dialect.  For full details about the dialect and formatting parameters, see
 | ||
|  |    section :ref:`csv-fmt-params`.
 | ||
|  | 
 | ||
|  |    All data read are returned as strings.  No automatic data type conversion is
 | ||
|  |    performed.
 | ||
|  | 
 | ||
|  |    .. versionchanged:: 2.5
 | ||
|  |       The parser is now stricter with respect to multi-line quoted fields. Previously,
 | ||
|  |       if a line ended within a quoted field without a terminating newline character, a
 | ||
|  |       newline would be inserted into the returned field. This behavior caused problems
 | ||
|  |       when reading files which contained carriage return characters within fields.
 | ||
|  |       The behavior was changed to return the field without inserting newlines. As a
 | ||
|  |       consequence, if newlines embedded within fields are important, the input should
 | ||
|  |       be split into lines in a manner which preserves the newline characters.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: writer(csvfile[, dialect='excel'][, fmtparam])
 | ||
|  | 
 | ||
|  |    Return a writer object responsible for converting the user's data into delimited
 | ||
|  |    strings on the given file-like object.  *csvfile* can be any object with a
 | ||
|  |    :func:`write` method.  If *csvfile* is a file object, it must be opened with the
 | ||
|  |    'b' flag on platforms where that makes a difference.  An optional *dialect*
 | ||
|  |    parameter can be given which is used to define a set of parameters specific to a
 | ||
|  |    particular CSV dialect.  It may be an instance of a subclass of the
 | ||
|  |    :class:`Dialect` class or one of the strings returned by the
 | ||
|  |    :func:`list_dialects` function.  The other optional *fmtparam* keyword arguments
 | ||
|  |    can be given to override individual formatting parameters in the current
 | ||
|  |    dialect.  For full details about the dialect and formatting parameters, see
 | ||
|  |    section :ref:`csv-fmt-params`. To make it
 | ||
|  |    as easy as possible to interface with modules which implement the DB API, the
 | ||
|  |    value :const:`None` is written as the empty string.  While this isn't a
 | ||
|  |    reversible transformation, it makes it easier to dump SQL NULL data values to
 | ||
|  |    CSV files without preprocessing the data returned from a ``cursor.fetch*`` call.
 | ||
|  |    All other non-string data are stringified with :func:`str` before being written.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: register_dialect(name[, dialect][, fmtparam])
 | ||
|  | 
 | ||
|  |    Associate *dialect* with *name*.  *name* must be a string or Unicode object. The
 | ||
|  |    dialect can be specified either by passing a sub-class of :class:`Dialect`, or
 | ||
|  |    by *fmtparam* keyword arguments, or both, with keyword arguments overriding
 | ||
|  |    parameters of the dialect. For full details about the dialect and formatting
 | ||
|  |    parameters, see section :ref:`csv-fmt-params`.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: unregister_dialect(name)
 | ||
|  | 
 | ||
|  |    Delete the dialect associated with *name* from the dialect registry.  An
 | ||
|  |    :exc:`Error` is raised if *name* is not a registered dialect name.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: get_dialect(name)
 | ||
|  | 
 | ||
|  |    Return the dialect associated with *name*.  An :exc:`Error` is raised if *name*
 | ||
|  |    is not a registered dialect name.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: list_dialects()
 | ||
|  | 
 | ||
|  |    Return the names of all registered dialects.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. function:: field_size_limit([new_limit])
 | ||
|  | 
 | ||
|  |    Returns the current maximum field size allowed by the parser. If *new_limit* is
 | ||
|  |    given, this becomes the new limit.
 | ||
|  | 
 | ||
|  |    .. versionadded:: 2.5
 | ||
|  | 
 | ||
|  | The :mod:`csv` module defines the following classes:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. class:: DictReader(csvfile[, fieldnames=:const:None,[, restkey=:const:None[, restval=None[, dialect='excel'[, *args, **kwds]]]]])
 | ||
|  | 
 | ||
|  |    Create an object which operates like a regular reader but maps the information
 | ||
|  |    read into a dict whose keys are given by the optional  *fieldnames* parameter.
 | ||
|  |    If the *fieldnames* parameter is omitted, the values in the first row of the
 | ||
|  |    *csvfile* will be used as the fieldnames. If the row read has fewer fields than
 | ||
|  |    the fieldnames sequence, the value of *restval* will be used as the default
 | ||
|  |    value.  If the row read has more fields than the fieldnames sequence, the
 | ||
|  |    remaining data is added as a sequence keyed by the value of *restkey*.  If the
 | ||
|  |    row read has fewer fields than the fieldnames sequence, the remaining keys take
 | ||
|  |    the value of the optional *restval* parameter.  Any other optional or keyword
 | ||
|  |    arguments are passed to the underlying :class:`reader` instance.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. class:: DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]])
 | ||
|  | 
 | ||
|  |    Create an object which operates like a regular writer but maps dictionaries onto
 | ||
|  |    output rows.  The *fieldnames* parameter identifies the order in which values in
 | ||
|  |    the dictionary passed to the :meth:`writerow` method are written to the
 | ||
|  |    *csvfile*.  The optional *restval* parameter specifies the value to be written
 | ||
|  |    if the dictionary is missing a key in *fieldnames*.  If the dictionary passed to
 | ||
|  |    the :meth:`writerow` method contains a key not found in *fieldnames*, the
 | ||
|  |    optional *extrasaction* parameter indicates what action to take.  If it is set
 | ||
|  |    to ``'raise'`` a :exc:`ValueError` is raised.  If it is set to ``'ignore'``,
 | ||
|  |    extra values in the dictionary are ignored.  Any other optional or keyword
 | ||
|  |    arguments are passed to the underlying :class:`writer` instance.
 | ||
|  | 
 | ||
|  |    Note that unlike the :class:`DictReader` class, the *fieldnames* parameter of
 | ||
|  |    the :class:`DictWriter` is not optional.  Since Python's :class:`dict` objects
 | ||
|  |    are not ordered, there is not enough information available to deduce the order
 | ||
|  |    in which the row should be written to the *csvfile*.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. class:: Dialect
 | ||
|  | 
 | ||
|  |    The :class:`Dialect` class is a container class relied on primarily for its
 | ||
|  |    attributes, which are used to define the parameters for a specific
 | ||
|  |    :class:`reader` or :class:`writer` instance.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. class:: excel()
 | ||
|  | 
 | ||
|  |    The :class:`excel` class defines the usual properties of an Excel-generated CSV
 | ||
|  |    file.  It is registered with the dialect name ``'excel'``.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. class:: excel_tab()
 | ||
|  | 
 | ||
|  |    The :class:`excel_tab` class defines the usual properties of an Excel-generated
 | ||
|  |    TAB-delimited file.  It is registered with the dialect name ``'excel-tab'``.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. class:: Sniffer()
 | ||
|  | 
 | ||
|  |    The :class:`Sniffer` class is used to deduce the format of a CSV file.
 | ||
|  | 
 | ||
|  | The :class:`Sniffer` class provides two methods:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. method:: Sniffer.sniff(sample[, delimiters=None])
 | ||
|  | 
 | ||
|  |    Analyze the given *sample* and return a :class:`Dialect` subclass reflecting the
 | ||
|  |    parameters found.  If the optional *delimiters* parameter is given, it is
 | ||
|  |    interpreted as a string containing possible valid delimiter characters.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. method:: Sniffer.has_header(sample)
 | ||
|  | 
 | ||
|  |    Analyze the sample text (presumed to be in CSV format) and return :const:`True`
 | ||
|  |    if the first row appears to be a series of column headers.
 | ||
|  | 
 | ||
|  | The :mod:`csv` module defines the following constants:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. data:: QUOTE_ALL
 | ||
|  | 
 | ||
|  |    Instructs :class:`writer` objects to quote all fields.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. data:: QUOTE_MINIMAL
 | ||
|  | 
 | ||
|  |    Instructs :class:`writer` objects to only quote those fields which contain
 | ||
|  |    special characters such as *delimiter*, *quotechar* or any of the characters in
 | ||
|  |    *lineterminator*.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. data:: QUOTE_NONNUMERIC
 | ||
|  | 
 | ||
|  |    Instructs :class:`writer` objects to quote all non-numeric fields.
 | ||
|  | 
 | ||
|  |    Instructs the reader to convert all non-quoted fields to type *float*.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. data:: QUOTE_NONE
 | ||
|  | 
 | ||
|  |    Instructs :class:`writer` objects to never quote fields.  When the current
 | ||
|  |    *delimiter* occurs in output data it is preceded by the current *escapechar*
 | ||
|  |    character.  If *escapechar* is not set, the writer will raise :exc:`Error` if
 | ||
|  |    any characters that require escaping are encountered.
 | ||
|  | 
 | ||
|  |    Instructs :class:`reader` to perform no special processing of quote characters.
 | ||
|  | 
 | ||
|  | The :mod:`csv` module defines the following exception:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. exception:: Error
 | ||
|  | 
 | ||
|  |    Raised by any of the functions when an error is detected.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. _csv-fmt-params:
 | ||
|  | 
 | ||
|  | Dialects and Formatting Parameters
 | ||
|  | ----------------------------------
 | ||
|  | 
 | ||
|  | To make it easier to specify the format of input and output records, specific
 | ||
|  | formatting parameters are grouped together into dialects.  A dialect is a
 | ||
|  | subclass of the :class:`Dialect` class having a set of specific methods and a
 | ||
|  | single :meth:`validate` method.  When creating :class:`reader` or
 | ||
|  | :class:`writer` objects, the programmer can specify a string or a subclass of
 | ||
|  | the :class:`Dialect` class as the dialect parameter.  In addition to, or instead
 | ||
|  | of, the *dialect* parameter, the programmer can also specify individual
 | ||
|  | formatting parameters, which have the same names as the attributes defined below
 | ||
|  | for the :class:`Dialect` class.
 | ||
|  | 
 | ||
|  | Dialects support the following attributes:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.delimiter
 | ||
|  | 
 | ||
|  |    A one-character string used to separate fields.  It defaults to ``','``.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.doublequote
 | ||
|  | 
 | ||
|  |    Controls how instances of *quotechar* appearing inside a field should be
 | ||
|  |    themselves be quoted.  When :const:`True`, the character is doubled. When
 | ||
|  |    :const:`False`, the *escapechar* is used as a prefix to the *quotechar*.  It
 | ||
|  |    defaults to :const:`True`.
 | ||
|  | 
 | ||
|  |    On output, if *doublequote* is :const:`False` and no *escapechar* is set,
 | ||
|  |    :exc:`Error` is raised if a *quotechar* is found in a field.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.escapechar
 | ||
|  | 
 | ||
|  |    A one-character string used by the writer to escape the *delimiter* if *quoting*
 | ||
|  |    is set to :const:`QUOTE_NONE` and the *quotechar* if *doublequote* is
 | ||
|  |    :const:`False`. On reading, the *escapechar* removes any special meaning from
 | ||
|  |    the following character. It defaults to :const:`None`, which disables escaping.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.lineterminator
 | ||
|  | 
 | ||
|  |    The string used to terminate lines produced by the :class:`writer`. It defaults
 | ||
|  |    to ``'\r\n'``.
 | ||
|  | 
 | ||
|  |    .. note::
 | ||
|  | 
 | ||
|  |       The :class:`reader` is hard-coded to recognise either ``'\r'`` or ``'\n'`` as
 | ||
|  |       end-of-line, and ignores *lineterminator*. This behavior may change in the
 | ||
|  |       future.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.quotechar
 | ||
|  | 
 | ||
|  |    A one-character string used to quote fields containing special characters, such
 | ||
|  |    as the *delimiter* or *quotechar*, or which contain new-line characters.  It
 | ||
|  |    defaults to ``'"'``.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.quoting
 | ||
|  | 
 | ||
|  |    Controls when quotes should be generated by the writer and recognised by the
 | ||
|  |    reader.  It can take on any of the :const:`QUOTE_\*` constants (see section
 | ||
|  |    :ref:`csv-contents`) and defaults to :const:`QUOTE_MINIMAL`.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: Dialect.skipinitialspace
 | ||
|  | 
 | ||
|  |    When :const:`True`, whitespace immediately following the *delimiter* is ignored.
 | ||
|  |    The default is :const:`False`.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | Reader Objects
 | ||
|  | --------------
 | ||
|  | 
 | ||
|  | Reader objects (:class:`DictReader` instances and objects returned by the
 | ||
|  | :func:`reader` function) have the following public methods:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. method:: csvreader.next()
 | ||
|  | 
 | ||
|  |    Return the next row of the reader's iterable object as a list, parsed according
 | ||
|  |    to the current dialect.
 | ||
|  | 
 | ||
|  | Reader objects have the following public attributes:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: csvreader.dialect
 | ||
|  | 
 | ||
|  |    A read-only description of the dialect in use by the parser.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: csvreader.line_num
 | ||
|  | 
 | ||
|  |    The number of lines read from the source iterator. This is not the same as the
 | ||
|  |    number of records returned, as records can span multiple lines.
 | ||
|  | 
 | ||
|  |    .. versionadded:: 2.5
 | ||
|  | 
 | ||
|  | 
 | ||
|  | Writer Objects
 | ||
|  | --------------
 | ||
|  | 
 | ||
|  | :class:`Writer` objects (:class:`DictWriter` instances and objects returned by
 | ||
|  | the :func:`writer` function) have the following public methods.  A *row* must be
 | ||
|  | a sequence of strings or numbers for :class:`Writer` objects and a dictionary
 | ||
|  | mapping fieldnames to strings or numbers (by passing them through :func:`str`
 | ||
|  | first) for :class:`DictWriter` objects.  Note that complex numbers are written
 | ||
|  | out surrounded by parens. This may cause some problems for other programs which
 | ||
|  | read CSV files (assuming they support complex numbers at all).
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. method:: csvwriter.writerow(row)
 | ||
|  | 
 | ||
|  |    Write the *row* parameter to the writer's file object, formatted according to
 | ||
|  |    the current dialect.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. method:: csvwriter.writerows(rows)
 | ||
|  | 
 | ||
|  |    Write all the *rows* parameters (a list of *row* objects as described above) to
 | ||
|  |    the writer's file object, formatted according to the current dialect.
 | ||
|  | 
 | ||
|  | Writer objects have the following public attribute:
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. attribute:: csvwriter.dialect
 | ||
|  | 
 | ||
|  |    A read-only description of the dialect in use by the writer.
 | ||
|  | 
 | ||
|  | 
 | ||
|  | .. _csv-examples:
 | ||
|  | 
 | ||
|  | Examples
 | ||
|  | --------
 | ||
|  | 
 | ||
|  | The simplest example of reading a CSV file::
 | ||
|  | 
 | ||
|  |    import csv
 | ||
|  |    reader = csv.reader(open("some.csv", "rb"))
 | ||
|  |    for row in reader:
 | ||
|  |        print row
 | ||
|  | 
 | ||
|  | Reading a file with an alternate format::
 | ||
|  | 
 | ||
|  |    import csv
 | ||
|  |    reader = csv.reader(open("passwd", "rb"), delimiter=':', quoting=csv.QUOTE_NONE)
 | ||
|  |    for row in reader:
 | ||
|  |        print row
 | ||
|  | 
 | ||
|  | The corresponding simplest possible writing example is::
 | ||
|  | 
 | ||
|  |    import csv
 | ||
|  |    writer = csv.writer(open("some.csv", "wb"))
 | ||
|  |    writer.writerows(someiterable)
 | ||
|  | 
 | ||
|  | Registering a new dialect::
 | ||
|  | 
 | ||
|  |    import csv
 | ||
|  | 
 | ||
|  |    csv.register_dialect('unixpwd', delimiter=':', quoting=csv.QUOTE_NONE)
 | ||
|  | 
 | ||
|  |    reader = csv.reader(open("passwd", "rb"), 'unixpwd')
 | ||
|  | 
 | ||
|  | A slightly more advanced use of the reader --- catching and reporting errors::
 | ||
|  | 
 | ||
|  |    import csv, sys
 | ||
|  |    filename = "some.csv"
 | ||
|  |    reader = csv.reader(open(filename, "rb"))
 | ||
|  |    try:
 | ||
|  |        for row in reader:
 | ||
|  |            print row
 | ||
|  |    except csv.Error as e:
 | ||
|  |        sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e))
 | ||
|  | 
 | ||
|  | And while the module doesn't directly support parsing strings, it can easily be
 | ||
|  | done::
 | ||
|  | 
 | ||
|  |    import csv
 | ||
|  |    for row in csv.reader(['one,two,three']):
 | ||
|  |        print row
 | ||
|  | 
 | ||
|  | The :mod:`csv` module doesn't directly support reading and writing Unicode, but
 | ||
|  | it is 8-bit-clean save for some problems with ASCII NUL characters.  So you can
 | ||
|  | write functions or classes that handle the encoding and decoding for you as long
 | ||
|  | as you avoid encodings like UTF-16 that use NULs.  UTF-8 is recommended.
 | ||
|  | 
 | ||
|  | :func:`unicode_csv_reader` below is a generator that wraps :class:`csv.reader`
 | ||
|  | to handle Unicode CSV data (a list of Unicode strings).  :func:`utf_8_encoder`
 | ||
|  | is a generator that encodes the Unicode strings as UTF-8, one string (or row) at
 | ||
|  | a time.  The encoded strings are parsed by the CSV reader, and
 | ||
|  | :func:`unicode_csv_reader` decodes the UTF-8-encoded cells back into Unicode::
 | ||
|  | 
 | ||
|  |    import csv
 | ||
|  | 
 | ||
|  |    def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
 | ||
|  |        # csv.py doesn't do Unicode; encode temporarily as UTF-8:
 | ||
|  |        csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
 | ||
|  |                                dialect=dialect, **kwargs)
 | ||
|  |        for row in csv_reader:
 | ||
|  |            # decode UTF-8 back to Unicode, cell by cell:
 | ||
|  |            yield [unicode(cell, 'utf-8') for cell in row]
 | ||
|  | 
 | ||
|  |    def utf_8_encoder(unicode_csv_data):
 | ||
|  |        for line in unicode_csv_data:
 | ||
|  |            yield line.encode('utf-8')
 | ||
|  | 
 | ||
|  | For all other encodings the following :class:`UnicodeReader` and
 | ||
|  | :class:`UnicodeWriter` classes can be used. They take an additional *encoding*
 | ||
|  | parameter in their constructor and make sure that the data passes the real
 | ||
|  | reader or writer encoded as UTF-8::
 | ||
|  | 
 | ||
|  |    import csv, codecs, cStringIO
 | ||
|  | 
 | ||
|  |    class UTF8Recoder:
 | ||
|  |        """
 | ||
|  |        Iterator that reads an encoded stream and reencodes the input to UTF-8
 | ||
|  |        """
 | ||
|  |        def __init__(self, f, encoding):
 | ||
|  |            self.reader = codecs.getreader(encoding)(f)
 | ||
|  | 
 | ||
|  |        def __iter__(self):
 | ||
|  |            return self
 | ||
|  | 
 | ||
|  |        def __next__(self):
 | ||
|  |            return next(self.reader).encode("utf-8")
 | ||
|  | 
 | ||
|  |    class UnicodeReader:
 | ||
|  |        """
 | ||
|  |        A CSV reader which will iterate over lines in the CSV file "f",
 | ||
|  |        which is encoded in the given encoding.
 | ||
|  |        """
 | ||
|  | 
 | ||
|  |        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
 | ||
|  |            f = UTF8Recoder(f, encoding)
 | ||
|  |            self.reader = csv.reader(f, dialect=dialect, **kwds)
 | ||
|  | 
 | ||
|  |        def __next__(self):
 | ||
|  |            row = next(self.reader)
 | ||
|  |            return [unicode(s, "utf-8") for s in row]
 | ||
|  | 
 | ||
|  |        def __iter__(self):
 | ||
|  |            return self
 | ||
|  | 
 | ||
|  |    class UnicodeWriter:
 | ||
|  |        """
 | ||
|  |        A CSV writer which will write rows to CSV file "f",
 | ||
|  |        which is encoded in the given encoding.
 | ||
|  |        """
 | ||
|  | 
 | ||
|  |        def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
 | ||
|  |            # Redirect output to a queue
 | ||
|  |            self.queue = cStringIO.StringIO()
 | ||
|  |            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
 | ||
|  |            self.stream = f
 | ||
|  |            self.encoder = codecs.getincrementalencoder(encoding)()
 | ||
|  | 
 | ||
|  |        def writerow(self, row):
 | ||
|  |            self.writer.writerow([s.encode("utf-8") for s in row])
 | ||
|  |            # Fetch UTF-8 output from the queue ...
 | ||
|  |            data = self.queue.getvalue()
 | ||
|  |            data = data.decode("utf-8")
 | ||
|  |            # ... and reencode it into the target encoding
 | ||
|  |            data = self.encoder.encode(data)
 | ||
|  |            # write to the target stream
 | ||
|  |            self.stream.write(data)
 | ||
|  |            # empty queue
 | ||
|  |            self.queue.truncate(0)
 | ||
|  | 
 | ||
|  |        def writerows(self, rows):
 | ||
|  |            for row in rows:
 | ||
|  |                self.writerow(row)
 | ||
|  | 
 |