mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Merged revisions 82510 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r82510 | senthil.kumaran | 2010-07-03 23:18:22 +0530 (Sat, 03 Jul 2010) | 4 lines Fix Issue5468 - urlencode to handle bytes and other alternate encodings. (Extensive tests provided). Patch by Dan Mahn. ........
This commit is contained in:
		
							parent
							
								
									8e42fb7ada
								
							
						
					
					
						commit
						fe1ad15b4b
					
				
					 4 changed files with 165 additions and 24 deletions
				
			
		|  | @ -307,23 +307,29 @@ The :mod:`urllib.parse` module defines the following functions: | |||
|    ``b'a&\xef'``. | ||||
| 
 | ||||
| 
 | ||||
| .. function:: urlencode(query, doseq=False) | ||||
| .. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None) | ||||
| 
 | ||||
|    Convert a mapping object or a sequence of two-element tuples  to a | ||||
|    "url-encoded" string, suitable to pass to :func:`urlopen` above as the | ||||
|    optional *data* argument.  This is useful to pass a dictionary of form | ||||
|    fields to a ``POST`` request.  The resulting string is a series of | ||||
|    ``key=value`` pairs separated by ``'&'`` characters, where both *key* and | ||||
|    *value* are quoted using :func:`quote_plus` above. When a sequence of | ||||
|    two-element tuples is used as the *query* argument, the first element of | ||||
|    each tuple is a key and the second is a value. The value element in itself | ||||
|    can be a sequence and in that case, if the optional parameter *doseq* is | ||||
|    evaluates to *True*, individual ``key=value`` pairs separated by ``'&'``are | ||||
|    generated for each element of the value sequence for the key.  The order of | ||||
|    parameters in the encoded string will match the order of parameter tuples in | ||||
|    the sequence. This module provides the functions :func:`parse_qs` and | ||||
|    :func:`parse_qsl` which are used to parse query strings into Python data | ||||
|    structures. | ||||
|    Convert a mapping object or a sequence of two-element, which may either be a | ||||
|    :class:`str` or a :class:`bytes` tuples,  to a "url-encoded" string, | ||||
|    suitable to pass to :func:`urlopen` above as the optional *data* argument. | ||||
|    This is useful to pass a dictionary of form fields to a ``POST`` request. | ||||
|    The resulting string is a series of ``key=value`` pairs separated by ``'&'`` | ||||
|    characters, where both *key* and *value* are quoted using :func:`quote_plus` | ||||
|    above. When a sequence of two-element tuples is used as the *query* | ||||
|    argument, the first element of each tuple is a key and the second is a | ||||
|    value. The value element in itself can be a sequence and in that case, if | ||||
|    the optional parameter *doseq* is evaluates to *True*, individual | ||||
|    ``key=value`` pairs separated by ``'&'`` are generated for each element of | ||||
|    the value sequence for the key.  The order of parameters in the encoded | ||||
|    string will match the order of parameter tuples in the sequence. This module | ||||
|    provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used | ||||
|    to parse query strings into Python data structures. | ||||
| 
 | ||||
|    When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error* | ||||
|    parameters are sent the :func:`quote_plus` for encoding. | ||||
| 
 | ||||
|    .. versionchanged:: 3.2 | ||||
|       query paramater supports bytes and string. | ||||
| 
 | ||||
| 
 | ||||
| .. seealso:: | ||||
|  |  | |||
|  | @ -797,6 +797,116 @@ def test_nonstring_seq_values(self): | |||
|         self.assertEqual("a=a&a=b", | ||||
|                          urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) | ||||
| 
 | ||||
|     def test_urlencode_encoding(self): | ||||
|         # ASCII encoding. Expect %3F with errors="replace' | ||||
|         given = (('\u00a0', '\u00c1'),) | ||||
|         expect = '%3F=%3F' | ||||
|         result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # Default is UTF-8 encoding. | ||||
|         given = (('\u00a0', '\u00c1'),) | ||||
|         expect = '%C2%A0=%C3%81' | ||||
|         result = urllib.parse.urlencode(given) | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # Latin-1 encoding. | ||||
|         given = (('\u00a0', '\u00c1'),) | ||||
|         expect = '%A0=%C1' | ||||
|         result = urllib.parse.urlencode(given, encoding="latin-1") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|     def test_urlencode_encoding_doseq(self): | ||||
|         # ASCII Encoding. Expect %3F with errors="replace' | ||||
|         given = (('\u00a0', '\u00c1'),) | ||||
|         expect = '%3F=%3F' | ||||
|         result = urllib.parse.urlencode(given, doseq=True, | ||||
|                                         encoding="ASCII", errors="replace") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # ASCII Encoding. On a sequence of values. | ||||
|         given = (("\u00a0", (1, "\u00c1")),) | ||||
|         expect = '%3F=1&%3F=%3F' | ||||
|         result = urllib.parse.urlencode(given, True, | ||||
|                                         encoding="ASCII", errors="replace") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # Utf-8 | ||||
|         given = (("\u00a0", "\u00c1"),) | ||||
|         expect = '%C2%A0=%C3%81' | ||||
|         result = urllib.parse.urlencode(given, True) | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         given = (("\u00a0", (42, "\u00c1")),) | ||||
|         expect = '%C2%A0=42&%C2%A0=%C3%81' | ||||
|         result = urllib.parse.urlencode(given, True) | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # latin-1 | ||||
|         given = (("\u00a0", "\u00c1"),) | ||||
|         expect = '%A0=%C1' | ||||
|         result = urllib.parse.urlencode(given, True, encoding="latin-1") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         given = (("\u00a0", (42, "\u00c1")),) | ||||
|         expect = '%A0=42&%A0=%C1' | ||||
|         result = urllib.parse.urlencode(given, True, encoding="latin-1") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|     def test_urlencode_bytes(self): | ||||
|         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||
|         expect = '%A0%24=%C1%24' | ||||
|         result = urllib.parse.urlencode(given) | ||||
|         self.assertEqual(expect, result) | ||||
|         result = urllib.parse.urlencode(given, True) | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # Sequence of values | ||||
|         given = ((b'\xa0\x24', (42, b'\xc1\x24')),) | ||||
|         expect = '%A0%24=42&%A0%24=%C1%24' | ||||
|         result = urllib.parse.urlencode(given, True) | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|     def test_urlencode_encoding_safe_parameter(self): | ||||
| 
 | ||||
|         # Send '$' (\x24) as safe character | ||||
|         # Default utf-8 encoding | ||||
| 
 | ||||
|         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||
|         result = urllib.parse.urlencode(given, safe=":$") | ||||
|         expect = '%A0$=%C1$' | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||
|         result = urllib.parse.urlencode(given, doseq=True, safe=":$") | ||||
|         expect = '%A0$=%C1$' | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # Safe parameter in sequence | ||||
|         given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) | ||||
|         expect = '%A0$=%C1$&%A0$=13&%A0$=42' | ||||
|         result = urllib.parse.urlencode(given, True, safe=":$") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         # Test all above in latin-1 encoding | ||||
| 
 | ||||
|         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||
|         result = urllib.parse.urlencode(given, safe=":$", | ||||
|                                         encoding="latin-1") | ||||
|         expect = '%A0$=%C1$' | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
|         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||
|         expect = '%A0$=%C1$' | ||||
|         result = urllib.parse.urlencode(given, doseq=True, safe=":$", | ||||
|                                         encoding="latin-1") | ||||
| 
 | ||||
|         given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) | ||||
|         expect = '%A0$=%C1$&%A0$=13&%A0$=42' | ||||
|         result = urllib.parse.urlencode(given, True, safe=":$", | ||||
|                                         encoding="latin-1") | ||||
|         self.assertEqual(expect, result) | ||||
| 
 | ||||
| class Pathname_Tests(unittest.TestCase): | ||||
|     """Test pathname2url() and url2pathname()""" | ||||
| 
 | ||||
|  |  | |||
|  | @ -533,7 +533,7 @@ def quote_from_bytes(bs, safe='/'): | |||
|         _safe_quoters[cachekey] = quoter | ||||
|     return ''.join([quoter[char] for char in bs]) | ||||
| 
 | ||||
| def urlencode(query, doseq=False): | ||||
| def urlencode(query, doseq=False, safe='', encoding=None, errors=None): | ||||
|     """Encode a sequence of two-element tuples or dictionary into a URL query string. | ||||
| 
 | ||||
|     If any values in the query arg are sequences and doseq is true, each | ||||
|  | @ -542,6 +542,10 @@ def urlencode(query, doseq=False): | |||
|     If the query arg is a sequence of two-element tuples, the order of the | ||||
|     parameters in the output will match the order of parameters in the | ||||
|     input. | ||||
| 
 | ||||
|     The query arg may be either a string or a bytes type. When query arg is a | ||||
|     string, the safe, encoding and error parameters are sent the quote_plus for | ||||
|     encoding. | ||||
|     """ | ||||
| 
 | ||||
|     if hasattr(query, "items"): | ||||
|  | @ -566,14 +570,28 @@ def urlencode(query, doseq=False): | |||
|     l = [] | ||||
|     if not doseq: | ||||
|         for k, v in query: | ||||
|             k = quote_plus(str(k)) | ||||
|             v = quote_plus(str(v)) | ||||
|             if isinstance(k, bytes): | ||||
|                 k = quote_plus(k, safe) | ||||
|             else: | ||||
|                 k = quote_plus(str(k), safe, encoding, errors) | ||||
| 
 | ||||
|             if isinstance(v, bytes): | ||||
|                 v = quote_plus(v, safe) | ||||
|             else: | ||||
|                 v = quote_plus(str(v), safe, encoding, errors) | ||||
|             l.append(k + '=' + v) | ||||
|     else: | ||||
|         for k, v in query: | ||||
|             k = quote_plus(str(k)) | ||||
|             if isinstance(v, str): | ||||
|                 v = quote_plus(v) | ||||
|             if isinstance(k, bytes): | ||||
|                 k = quote_plus(k, safe) | ||||
|             else: | ||||
|                 k = quote_plus(str(k), safe, encoding, errors) | ||||
| 
 | ||||
|             if isinstance(v, bytes): | ||||
|                 v = quote_plus(v, safe) | ||||
|                 l.append(k + '=' + v) | ||||
|             elif isinstance(v, str): | ||||
|                 v = quote_plus(v, safe, encoding, errors) | ||||
|                 l.append(k + '=' + v) | ||||
|             else: | ||||
|                 try: | ||||
|  | @ -581,12 +599,16 @@ def urlencode(query, doseq=False): | |||
|                     x = len(v) | ||||
|                 except TypeError: | ||||
|                     # not a sequence | ||||
|                     v = quote_plus(str(v)) | ||||
|                     v = quote_plus(str(v), safe, encoding, errors) | ||||
|                     l.append(k + '=' + v) | ||||
|                 else: | ||||
|                     # loop over the sequence | ||||
|                     for elt in v: | ||||
|                         l.append(k + '=' + quote_plus(str(elt))) | ||||
|                         if isinstance(elt, bytes): | ||||
|                             elt = quote_plus(elt, safe) | ||||
|                         else: | ||||
|                             elt = quote_plus(str(elt), safe, encoding, errors) | ||||
|                         l.append(k + '=' + elt) | ||||
|     return '&'.join(l) | ||||
| 
 | ||||
| # Utilities to parse URLs (most of these return None for missing parts): | ||||
|  |  | |||
|  | @ -75,6 +75,9 @@ C-API | |||
| Library | ||||
| ------- | ||||
| 
 | ||||
| - Issue #5468: urlencode to handle bytes type and other encodings in its query | ||||
|   parameter. Patch by Dan Mahn. | ||||
| 
 | ||||
| - Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop | ||||
|   module, ensure that the input string length is a multiple of the frame size | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Senthil Kumaran
						Senthil Kumaran