mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	bpo-29979: Rewrite cgi.parse_multipart to make it consistent with FieldStorage (#991)
This commit is contained in:
		
							parent
							
								
									f34c685020
								
							
						
					
					
						commit
						cc3fa204d3
					
				
					 5 changed files with 38 additions and 102 deletions
				
			
		|  | @ -294,19 +294,20 @@ algorithms implemented in this module in other circumstances. | ||||||
|    This function is deprecated in this module. Use :func:`urllib.parse.parse_qsl` |    This function is deprecated in this module. Use :func:`urllib.parse.parse_qsl` | ||||||
|    instead. It is maintained here only for backward compatibility. |    instead. It is maintained here only for backward compatibility. | ||||||
| 
 | 
 | ||||||
| .. function:: parse_multipart(fp, pdict) | .. function:: parse_multipart(fp, pdict, encoding="utf-8") | ||||||
| 
 | 
 | ||||||
|    Parse input of type :mimetype:`multipart/form-data` (for  file uploads). |    Parse input of type :mimetype:`multipart/form-data` (for  file uploads). | ||||||
|    Arguments are *fp* for the input file and *pdict* for a dictionary containing |    Arguments are *fp* for the input file, *pdict* for a dictionary containing | ||||||
|    other parameters in the :mailheader:`Content-Type` header. |    other parameters in the :mailheader:`Content-Type` header, and *encoding*, | ||||||
|  |    the request encoding. | ||||||
| 
 | 
 | ||||||
|    Returns a dictionary just like :func:`urllib.parse.parse_qs` keys are the field names, each |    Returns a dictionary just like :func:`urllib.parse.parse_qs`: keys are the | ||||||
|    value is a list of values for that field.  This is easy to use but not much good |    field names, each value is a list of values for that field. For non-file | ||||||
|    if you are expecting megabytes to be uploaded --- in that case, use the |    fields, the value is a list of strings. | ||||||
|    :class:`FieldStorage` class instead which is much more flexible. |  | ||||||
| 
 | 
 | ||||||
|    Note that this does not parse nested multipart parts --- use |    This is easy to use but not much good if you are expecting megabytes to be | ||||||
|    :class:`FieldStorage` for that. |    uploaded --- in that case, use the :class:`FieldStorage` class instead | ||||||
|  |    which is much more flexible. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| .. function:: parse_header(string) | .. function:: parse_header(string) | ||||||
|  |  | ||||||
|  | @ -95,6 +95,14 @@ New Modules | ||||||
| Improved Modules | Improved Modules | ||||||
| ================ | ================ | ||||||
| 
 | 
 | ||||||
|  | cgi | ||||||
|  | --- | ||||||
|  | 
 | ||||||
|  | :func:`~cgi.parse_multipart` returns the same results as | ||||||
|  | :class:`~FieldStorage` : for non-file fields, the value associated to a key | ||||||
|  | is a list of strings, not bytes. | ||||||
|  | (Contributed by Pierre Quentel in :issue:`29979`.) | ||||||
|  | 
 | ||||||
| binascii | binascii | ||||||
| -------- | -------- | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										105
									
								
								Lib/cgi.py
									
										
									
									
									
								
							
							
						
						
									
										105
									
								
								Lib/cgi.py
									
										
									
									
									
								
							|  | @ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): | ||||||
|          DeprecationWarning, 2) |          DeprecationWarning, 2) | ||||||
|     return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) |     return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) | ||||||
| 
 | 
 | ||||||
| def parse_multipart(fp, pdict): | def parse_multipart(fp, pdict, encoding="utf-8"): | ||||||
|     """Parse multipart input. |     """Parse multipart input. | ||||||
| 
 | 
 | ||||||
|     Arguments: |     Arguments: | ||||||
|     fp   : input file |     fp   : input file | ||||||
|     pdict: dictionary containing other parameters of content-type header |     pdict: dictionary containing other parameters of content-type header | ||||||
|  |     encoding: request encoding | ||||||
| 
 | 
 | ||||||
|     Returns a dictionary just like parse_qs(): keys are the field names, each |     Returns a dictionary just like parse_qs(): keys are the field names, each | ||||||
|     value is a list of values for that field.  This is easy to use but not |     value is a list of values for that field. For non-file fields, the value | ||||||
|     much good if you are expecting megabytes to be uploaded -- in that case, |     is a list of strings. | ||||||
|     use the FieldStorage class instead which is much more flexible.  Note |  | ||||||
|     that content-type is the raw, unparsed contents of the content-type |  | ||||||
|     header. |  | ||||||
| 
 |  | ||||||
|     XXX This does not parse nested multipart parts -- use FieldStorage for |  | ||||||
|     that. |  | ||||||
| 
 |  | ||||||
|     XXX This should really be subsumed by FieldStorage altogether -- no |  | ||||||
|     point in having two implementations of the same parsing algorithm. |  | ||||||
|     Also, FieldStorage protects itself better against certain DoS attacks |  | ||||||
|     by limiting the size of the data read in one chunk.  The API here |  | ||||||
|     does not support that kind of protection.  This also affects parse() |  | ||||||
|     since it can call parse_multipart(). |  | ||||||
| 
 |  | ||||||
|     """ |     """ | ||||||
|     import http.client |     # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always | ||||||
| 
 |     # represented as 7bit US-ASCII. | ||||||
|     boundary = b"" |     boundary = pdict['boundary'].decode('ascii') | ||||||
|     if 'boundary' in pdict: |     ctype = "multipart/form-data; boundary={}".format(boundary) | ||||||
|         boundary = pdict['boundary'] |     headers = Message() | ||||||
|     if not valid_boundary(boundary): |     headers.set_type(ctype) | ||||||
|         raise ValueError('Invalid boundary in multipart form: %r' |     headers['Content-Length'] = pdict['CONTENT-LENGTH'] | ||||||
|                             % (boundary,)) |     fs = FieldStorage(fp, headers=headers, encoding=encoding, | ||||||
| 
 |         environ={'REQUEST_METHOD': 'POST'}) | ||||||
|     nextpart = b"--" + boundary |     return {k: fs.getlist(k) for k in fs} | ||||||
|     lastpart = b"--" + boundary + b"--" |  | ||||||
|     partdict = {} |  | ||||||
|     terminator = b"" |  | ||||||
| 
 |  | ||||||
|     while terminator != lastpart: |  | ||||||
|         bytes = -1 |  | ||||||
|         data = None |  | ||||||
|         if terminator: |  | ||||||
|             # At start of next part.  Read headers first. |  | ||||||
|             headers = http.client.parse_headers(fp) |  | ||||||
|             clength = headers.get('content-length') |  | ||||||
|             if clength: |  | ||||||
|                 try: |  | ||||||
|                     bytes = int(clength) |  | ||||||
|                 except ValueError: |  | ||||||
|                     pass |  | ||||||
|             if bytes > 0: |  | ||||||
|                 if maxlen and bytes > maxlen: |  | ||||||
|                     raise ValueError('Maximum content length exceeded') |  | ||||||
|                 data = fp.read(bytes) |  | ||||||
|             else: |  | ||||||
|                 data = b"" |  | ||||||
|         # Read lines until end of part. |  | ||||||
|         lines = [] |  | ||||||
|         while 1: |  | ||||||
|             line = fp.readline() |  | ||||||
|             if not line: |  | ||||||
|                 terminator = lastpart # End outer loop |  | ||||||
|                 break |  | ||||||
|             if line.startswith(b"--"): |  | ||||||
|                 terminator = line.rstrip() |  | ||||||
|                 if terminator in (nextpart, lastpart): |  | ||||||
|                     break |  | ||||||
|             lines.append(line) |  | ||||||
|         # Done with part. |  | ||||||
|         if data is None: |  | ||||||
|             continue |  | ||||||
|         if bytes < 0: |  | ||||||
|             if lines: |  | ||||||
|                 # Strip final line terminator |  | ||||||
|                 line = lines[-1] |  | ||||||
|                 if line[-2:] == b"\r\n": |  | ||||||
|                     line = line[:-2] |  | ||||||
|                 elif line[-1:] == b"\n": |  | ||||||
|                     line = line[:-1] |  | ||||||
|                 lines[-1] = line |  | ||||||
|                 data = b"".join(lines) |  | ||||||
|         line = headers['content-disposition'] |  | ||||||
|         if not line: |  | ||||||
|             continue |  | ||||||
|         key, params = parse_header(line) |  | ||||||
|         if key != 'form-data': |  | ||||||
|             continue |  | ||||||
|         if 'name' in params: |  | ||||||
|             name = params['name'] |  | ||||||
|         else: |  | ||||||
|             continue |  | ||||||
|         if name in partdict: |  | ||||||
|             partdict[name].append(data) |  | ||||||
|         else: |  | ||||||
|             partdict[name] = [data] |  | ||||||
| 
 |  | ||||||
|     return partdict |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| def _parseparam(s): | def _parseparam(s): | ||||||
|     while s[:1] == ';': |     while s[:1] == ';': | ||||||
|  |  | ||||||
|  | @ -126,8 +126,8 @@ def test_parse_multipart(self): | ||||||
|         env = {'boundary': BOUNDARY.encode('latin1'), |         env = {'boundary': BOUNDARY.encode('latin1'), | ||||||
|                'CONTENT-LENGTH': '558'} |                'CONTENT-LENGTH': '558'} | ||||||
|         result = cgi.parse_multipart(fp, env) |         result = cgi.parse_multipart(fp, env) | ||||||
|         expected = {'submit': [b' Add '], 'id': [b'1234'], |         expected = {'submit': [' Add '], 'id': ['1234'], | ||||||
|                     'file': [b'Testing 123.\n'], 'title': [b'']} |                     'file': [b'Testing 123.\n'], 'title': ['']} | ||||||
|         self.assertEqual(result, expected) |         self.assertEqual(result, expected) | ||||||
| 
 | 
 | ||||||
|     def test_fieldstorage_properties(self): |     def test_fieldstorage_properties(self): | ||||||
|  |  | ||||||
|  | @ -317,6 +317,10 @@ Extension Modules | ||||||
| Library | Library | ||||||
| ------- | ------- | ||||||
| 
 | 
 | ||||||
|  | - bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and | ||||||
|  |   making its results consistent with those of FieldStorage for | ||||||
|  |   multipart/form-data requests. Patch by Pierre Quentel. | ||||||
|  | 
 | ||||||
| - bpo-30243: Removed the __init__ methods of _json's scanner and encoder. | - bpo-30243: Removed the __init__ methods of _json's scanner and encoder. | ||||||
|   Misusing them could cause memory leaks or crashes.  Now scanner and encoder |   Misusing them could cause memory leaks or crashes.  Now scanner and encoder | ||||||
|   objects are completely initialized in the __new__ methods. |   objects are completely initialized in the __new__ methods. | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pierre Quentel
						Pierre Quentel