mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Fix Issue5468 - urlencode to handle bytes and other alternate encodings.
(Extensive tests provided). Patch by Dan Mahn.
This commit is contained in:
		
							parent
							
								
									85029334f6
								
							
						
					
					
						commit
						df022da3d8
					
				
					 4 changed files with 165 additions and 24 deletions
				
			
		|  | @ -310,23 +310,29 @@ The :mod:`urllib.parse` module defines the following functions: | ||||||
|    ``b'a&\xef'``. |    ``b'a&\xef'``. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| .. function:: urlencode(query, doseq=False) | .. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None) | ||||||
| 
 | 
 | ||||||
|    Convert a mapping object or a sequence of two-element tuples to a |    Convert a mapping object or a sequence of two-element tuples, which may | ||||||
|    "url-encoded" string, suitable to pass to :func:`urlopen` above as the |    either be a :class:`str` or a :class:`bytes`,  to a "url-encoded" string, | ||||||
|    optional *data* argument.  This is useful to pass a dictionary of form |    suitable to pass to :func:`urlopen` above as the optional *data* argument. | ||||||
|    fields to a ``POST`` request.  The resulting string is a series of |    This is useful to pass a dictionary of form fields to a ``POST`` request. | ||||||
|    ``key=value`` pairs separated by ``'&'`` characters, where both *key* and |    The resulting string is a series of ``key=value`` pairs separated by ``'&'`` | ||||||
|    *value* are quoted using :func:`quote_plus` above. When a sequence of |    characters, where both *key* and *value* are quoted using :func:`quote_plus` | ||||||
|    two-element tuples is used as the *query* argument, the first element of |    above. When a sequence of two-element tuples is used as the *query* | ||||||
|    each tuple is a key and the second is a value. The value element in itself |    argument, the first element of each tuple is a key and the second is a | ||||||
|    can be a sequence and in that case, if the optional parameter *doseq* is |    value. The value element in itself can be a sequence and in that case, if | ||||||
|    evaluates to *True*, individual ``key=value`` pairs separated by ``'&'`` are |    the optional parameter *doseq* is evaluates to *True*, individual | ||||||
|    generated for each element of the value sequence for the key.  The order of |    ``key=value`` pairs separated by ``'&'`` are generated for each element of | ||||||
|    parameters in the encoded string will match the order of parameter tuples in |    the value sequence for the key.  The order of parameters in the encoded | ||||||
|    the sequence. This module provides the functions :func:`parse_qs` and |    string will match the order of parameter tuples in the sequence. This module | ||||||
|    :func:`parse_qsl` which are used to parse query strings into Python data |    provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used | ||||||
|    structures. |    to parse query strings into Python data structures. | ||||||
|  | 
 | ||||||
|  |    When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error* | ||||||
|  |    parameters are sent the :func:`quote_plus` for encoding. | ||||||
|  | 
 | ||||||
|  |    .. versionchanged:: 3.2 | ||||||
|  |       query paramater supports bytes and string. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| .. seealso:: | .. seealso:: | ||||||
|  |  | ||||||
|  | @ -795,6 +795,116 @@ def test_nonstring_seq_values(self): | ||||||
|         self.assertEqual("a=a&a=b", |         self.assertEqual("a=a&a=b", | ||||||
|                          urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) |                          urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True)) | ||||||
| 
 | 
 | ||||||
|  |     def test_urlencode_encoding(self): | ||||||
|  |         # ASCII encoding. Expect %3F with errors="replace' | ||||||
|  |         given = (('\u00a0', '\u00c1'),) | ||||||
|  |         expect = '%3F=%3F' | ||||||
|  |         result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # Default is UTF-8 encoding. | ||||||
|  |         given = (('\u00a0', '\u00c1'),) | ||||||
|  |         expect = '%C2%A0=%C3%81' | ||||||
|  |         result = urllib.parse.urlencode(given) | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # Latin-1 encoding. | ||||||
|  |         given = (('\u00a0', '\u00c1'),) | ||||||
|  |         expect = '%A0=%C1' | ||||||
|  |         result = urllib.parse.urlencode(given, encoding="latin-1") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |     def test_urlencode_encoding_doseq(self): | ||||||
|  |         # ASCII Encoding. Expect %3F with errors="replace' | ||||||
|  |         given = (('\u00a0', '\u00c1'),) | ||||||
|  |         expect = '%3F=%3F' | ||||||
|  |         result = urllib.parse.urlencode(given, doseq=True, | ||||||
|  |                                         encoding="ASCII", errors="replace") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # ASCII Encoding. On a sequence of values. | ||||||
|  |         given = (("\u00a0", (1, "\u00c1")),) | ||||||
|  |         expect = '%3F=1&%3F=%3F' | ||||||
|  |         result = urllib.parse.urlencode(given, True, | ||||||
|  |                                         encoding="ASCII", errors="replace") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # Utf-8 | ||||||
|  |         given = (("\u00a0", "\u00c1"),) | ||||||
|  |         expect = '%C2%A0=%C3%81' | ||||||
|  |         result = urllib.parse.urlencode(given, True) | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         given = (("\u00a0", (42, "\u00c1")),) | ||||||
|  |         expect = '%C2%A0=42&%C2%A0=%C3%81' | ||||||
|  |         result = urllib.parse.urlencode(given, True) | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # latin-1 | ||||||
|  |         given = (("\u00a0", "\u00c1"),) | ||||||
|  |         expect = '%A0=%C1' | ||||||
|  |         result = urllib.parse.urlencode(given, True, encoding="latin-1") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         given = (("\u00a0", (42, "\u00c1")),) | ||||||
|  |         expect = '%A0=42&%A0=%C1' | ||||||
|  |         result = urllib.parse.urlencode(given, True, encoding="latin-1") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |     def test_urlencode_bytes(self): | ||||||
|  |         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||||
|  |         expect = '%A0%24=%C1%24' | ||||||
|  |         result = urllib.parse.urlencode(given) | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  |         result = urllib.parse.urlencode(given, True) | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # Sequence of values | ||||||
|  |         given = ((b'\xa0\x24', (42, b'\xc1\x24')),) | ||||||
|  |         expect = '%A0%24=42&%A0%24=%C1%24' | ||||||
|  |         result = urllib.parse.urlencode(given, True) | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |     def test_urlencode_encoding_safe_parameter(self): | ||||||
|  | 
 | ||||||
|  |         # Send '$' (\x24) as safe character | ||||||
|  |         # Default utf-8 encoding | ||||||
|  | 
 | ||||||
|  |         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||||
|  |         result = urllib.parse.urlencode(given, safe=":$") | ||||||
|  |         expect = '%A0$=%C1$' | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||||
|  |         result = urllib.parse.urlencode(given, doseq=True, safe=":$") | ||||||
|  |         expect = '%A0$=%C1$' | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # Safe parameter in sequence | ||||||
|  |         given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) | ||||||
|  |         expect = '%A0$=%C1$&%A0$=13&%A0$=42' | ||||||
|  |         result = urllib.parse.urlencode(given, True, safe=":$") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         # Test all above in latin-1 encoding | ||||||
|  | 
 | ||||||
|  |         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||||
|  |         result = urllib.parse.urlencode(given, safe=":$", | ||||||
|  |                                         encoding="latin-1") | ||||||
|  |         expect = '%A0$=%C1$' | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
|  |         given = ((b'\xa0\x24', b'\xc1\x24'),) | ||||||
|  |         expect = '%A0$=%C1$' | ||||||
|  |         result = urllib.parse.urlencode(given, doseq=True, safe=":$", | ||||||
|  |                                         encoding="latin-1") | ||||||
|  | 
 | ||||||
|  |         given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) | ||||||
|  |         expect = '%A0$=%C1$&%A0$=13&%A0$=42' | ||||||
|  |         result = urllib.parse.urlencode(given, True, safe=":$", | ||||||
|  |                                         encoding="latin-1") | ||||||
|  |         self.assertEqual(expect, result) | ||||||
|  | 
 | ||||||
| class Pathname_Tests(unittest.TestCase): | class Pathname_Tests(unittest.TestCase): | ||||||
|     """Test pathname2url() and url2pathname()""" |     """Test pathname2url() and url2pathname()""" | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -559,7 +559,7 @@ def quote_from_bytes(bs, safe='/'): | ||||||
|         _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ |         _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ | ||||||
|     return ''.join([quoter(char) for char in bs]) |     return ''.join([quoter(char) for char in bs]) | ||||||
| 
 | 
 | ||||||
| def urlencode(query, doseq=False): | def urlencode(query, doseq=False, safe='', encoding=None, errors=None): | ||||||
|     """Encode a sequence of two-element tuples or dictionary into a URL query string. |     """Encode a sequence of two-element tuples or dictionary into a URL query string. | ||||||
| 
 | 
 | ||||||
|     If any values in the query arg are sequences and doseq is true, each |     If any values in the query arg are sequences and doseq is true, each | ||||||
|  | @ -568,6 +568,10 @@ def urlencode(query, doseq=False): | ||||||
|     If the query arg is a sequence of two-element tuples, the order of the |     If the query arg is a sequence of two-element tuples, the order of the | ||||||
|     parameters in the output will match the order of parameters in the |     parameters in the output will match the order of parameters in the | ||||||
|     input. |     input. | ||||||
|  | 
 | ||||||
|  |     The query arg may be either a string or a bytes type. When query arg is a | ||||||
|  |     string, the safe, encoding and error parameters are sent the quote_plus for | ||||||
|  |     encoding. | ||||||
|     """ |     """ | ||||||
| 
 | 
 | ||||||
|     if hasattr(query, "items"): |     if hasattr(query, "items"): | ||||||
|  | @ -592,14 +596,28 @@ def urlencode(query, doseq=False): | ||||||
|     l = [] |     l = [] | ||||||
|     if not doseq: |     if not doseq: | ||||||
|         for k, v in query: |         for k, v in query: | ||||||
|             k = quote_plus(str(k)) |             if isinstance(k, bytes): | ||||||
|             v = quote_plus(str(v)) |                 k = quote_plus(k, safe) | ||||||
|  |             else: | ||||||
|  |                 k = quote_plus(str(k), safe, encoding, errors) | ||||||
|  | 
 | ||||||
|  |             if isinstance(v, bytes): | ||||||
|  |                 v = quote_plus(v, safe) | ||||||
|  |             else: | ||||||
|  |                 v = quote_plus(str(v), safe, encoding, errors) | ||||||
|             l.append(k + '=' + v) |             l.append(k + '=' + v) | ||||||
|     else: |     else: | ||||||
|         for k, v in query: |         for k, v in query: | ||||||
|             k = quote_plus(str(k)) |             if isinstance(k, bytes): | ||||||
|             if isinstance(v, str): |                 k = quote_plus(k, safe) | ||||||
|                 v = quote_plus(v) |             else: | ||||||
|  |                 k = quote_plus(str(k), safe, encoding, errors) | ||||||
|  | 
 | ||||||
|  |             if isinstance(v, bytes): | ||||||
|  |                 v = quote_plus(v, safe) | ||||||
|  |                 l.append(k + '=' + v) | ||||||
|  |             elif isinstance(v, str): | ||||||
|  |                 v = quote_plus(v, safe, encoding, errors) | ||||||
|                 l.append(k + '=' + v) |                 l.append(k + '=' + v) | ||||||
|             else: |             else: | ||||||
|                 try: |                 try: | ||||||
|  | @ -607,12 +625,16 @@ def urlencode(query, doseq=False): | ||||||
|                     x = len(v) |                     x = len(v) | ||||||
|                 except TypeError: |                 except TypeError: | ||||||
|                     # not a sequence |                     # not a sequence | ||||||
|                     v = quote_plus(str(v)) |                     v = quote_plus(str(v), safe, encoding, errors) | ||||||
|                     l.append(k + '=' + v) |                     l.append(k + '=' + v) | ||||||
|                 else: |                 else: | ||||||
|                     # loop over the sequence |                     # loop over the sequence | ||||||
|                     for elt in v: |                     for elt in v: | ||||||
|                         l.append(k + '=' + quote_plus(str(elt))) |                         if isinstance(elt, bytes): | ||||||
|  |                             elt = quote_plus(elt, safe) | ||||||
|  |                         else: | ||||||
|  |                             elt = quote_plus(str(elt), safe, encoding, errors) | ||||||
|  |                         l.append(k + '=' + elt) | ||||||
|     return '&'.join(l) |     return '&'.join(l) | ||||||
| 
 | 
 | ||||||
| # Utilities to parse URLs (most of these return None for missing parts): | # Utilities to parse URLs (most of these return None for missing parts): | ||||||
|  |  | ||||||
|  | @ -468,6 +468,9 @@ C-API | ||||||
| Library | Library | ||||||
| ------- | ------- | ||||||
| 
 | 
 | ||||||
|  | - Issue #5468: urlencode to handle bytes type and other encodings in its query | ||||||
|  |   parameter. Patch by Dan Mahn. | ||||||
|  | 
 | ||||||
| - Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop | - Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop | ||||||
|   module, ensure that the input string length is a multiple of the frame size |   module, ensure that the input string length is a multiple of the frame size | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Senthil Kumaran
						Senthil Kumaran