mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	bpo-29979: Rewrite cgi.parse_multipart to make it consistent with FieldStorage (#991)
This commit is contained in:
		
							parent
							
								
									f34c685020
								
							
						
					
					
						commit
						cc3fa204d3
					
				
					 5 changed files with 38 additions and 102 deletions
				
			
		|  | @ -294,19 +294,20 @@ algorithms implemented in this module in other circumstances. | |||
|    This function is deprecated in this module. Use :func:`urllib.parse.parse_qsl` | ||||
|    instead. It is maintained here only for backward compatibility. | ||||
| 
 | ||||
| .. function:: parse_multipart(fp, pdict) | ||||
| .. function:: parse_multipart(fp, pdict, encoding="utf-8") | ||||
| 
 | ||||
|    Parse input of type :mimetype:`multipart/form-data` (for  file uploads). | ||||
|    Arguments are *fp* for the input file and *pdict* for a dictionary containing | ||||
|    other parameters in the :mailheader:`Content-Type` header. | ||||
|    Arguments are *fp* for the input file, *pdict* for a dictionary containing | ||||
|    other parameters in the :mailheader:`Content-Type` header, and *encoding*, | ||||
|    the request encoding. | ||||
| 
 | ||||
|    Returns a dictionary just like :func:`urllib.parse.parse_qs` keys are the field names, each | ||||
|    value is a list of values for that field.  This is easy to use but not much good | ||||
|    if you are expecting megabytes to be uploaded --- in that case, use the | ||||
|    :class:`FieldStorage` class instead which is much more flexible. | ||||
|    Returns a dictionary just like :func:`urllib.parse.parse_qs`: keys are the | ||||
|    field names, each value is a list of values for that field. For non-file | ||||
|    fields, the value is a list of strings. | ||||
| 
 | ||||
|    Note that this does not parse nested multipart parts --- use | ||||
|    :class:`FieldStorage` for that. | ||||
|    This is easy to use but not much good if you are expecting megabytes to be | ||||
|    uploaded --- in that case, use the :class:`FieldStorage` class instead | ||||
|    which is much more flexible. | ||||
| 
 | ||||
| 
 | ||||
| .. function:: parse_header(string) | ||||
|  |  | |||
|  | @ -95,6 +95,14 @@ New Modules | |||
| Improved Modules | ||||
| ================ | ||||
| 
 | ||||
| cgi | ||||
| --- | ||||
| 
 | ||||
| :func:`~cgi.parse_multipart` returns the same results as | ||||
| :class:`~FieldStorage` : for non-file fields, the value associated to a key | ||||
| is a list of strings, not bytes. | ||||
| (Contributed by Pierre Quentel in :issue:`29979`.) | ||||
| 
 | ||||
| binascii | ||||
| -------- | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										105
									
								
								Lib/cgi.py
									
										
									
									
									
								
							
							
						
						
									
										105
									
								
								Lib/cgi.py
									
										
									
									
									
								
							|  | @ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): | |||
|          DeprecationWarning, 2) | ||||
|     return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) | ||||
| 
 | ||||
| def parse_multipart(fp, pdict): | ||||
| def parse_multipart(fp, pdict, encoding="utf-8"): | ||||
|     """Parse multipart input. | ||||
| 
 | ||||
|     Arguments: | ||||
|     fp   : input file | ||||
|     pdict: dictionary containing other parameters of content-type header | ||||
|     encoding: request encoding | ||||
| 
 | ||||
|     Returns a dictionary just like parse_qs(): keys are the field names, each | ||||
|     value is a list of values for that field.  This is easy to use but not | ||||
|     much good if you are expecting megabytes to be uploaded -- in that case, | ||||
|     use the FieldStorage class instead which is much more flexible.  Note | ||||
|     that content-type is the raw, unparsed contents of the content-type | ||||
|     header. | ||||
| 
 | ||||
|     XXX This does not parse nested multipart parts -- use FieldStorage for | ||||
|     that. | ||||
| 
 | ||||
|     XXX This should really be subsumed by FieldStorage altogether -- no | ||||
|     point in having two implementations of the same parsing algorithm. | ||||
|     Also, FieldStorage protects itself better against certain DoS attacks | ||||
|     by limiting the size of the data read in one chunk.  The API here | ||||
|     does not support that kind of protection.  This also affects parse() | ||||
|     since it can call parse_multipart(). | ||||
| 
 | ||||
|     value is a list of values for that field. For non-file fields, the value | ||||
|     is a list of strings. | ||||
|     """ | ||||
|     import http.client | ||||
| 
 | ||||
|     boundary = b"" | ||||
|     if 'boundary' in pdict: | ||||
|         boundary = pdict['boundary'] | ||||
|     if not valid_boundary(boundary): | ||||
|         raise ValueError('Invalid boundary in multipart form: %r' | ||||
|                             % (boundary,)) | ||||
| 
 | ||||
|     nextpart = b"--" + boundary | ||||
|     lastpart = b"--" + boundary + b"--" | ||||
|     partdict = {} | ||||
|     terminator = b"" | ||||
| 
 | ||||
|     while terminator != lastpart: | ||||
|         bytes = -1 | ||||
|         data = None | ||||
|         if terminator: | ||||
|             # At start of next part.  Read headers first. | ||||
|             headers = http.client.parse_headers(fp) | ||||
|             clength = headers.get('content-length') | ||||
|             if clength: | ||||
|                 try: | ||||
|                     bytes = int(clength) | ||||
|                 except ValueError: | ||||
|                     pass | ||||
|             if bytes > 0: | ||||
|                 if maxlen and bytes > maxlen: | ||||
|                     raise ValueError('Maximum content length exceeded') | ||||
|                 data = fp.read(bytes) | ||||
|             else: | ||||
|                 data = b"" | ||||
|         # Read lines until end of part. | ||||
|         lines = [] | ||||
|         while 1: | ||||
|             line = fp.readline() | ||||
|             if not line: | ||||
|                 terminator = lastpart # End outer loop | ||||
|                 break | ||||
|             if line.startswith(b"--"): | ||||
|                 terminator = line.rstrip() | ||||
|                 if terminator in (nextpart, lastpart): | ||||
|                     break | ||||
|             lines.append(line) | ||||
|         # Done with part. | ||||
|         if data is None: | ||||
|             continue | ||||
|         if bytes < 0: | ||||
|             if lines: | ||||
|                 # Strip final line terminator | ||||
|                 line = lines[-1] | ||||
|                 if line[-2:] == b"\r\n": | ||||
|                     line = line[:-2] | ||||
|                 elif line[-1:] == b"\n": | ||||
|                     line = line[:-1] | ||||
|                 lines[-1] = line | ||||
|                 data = b"".join(lines) | ||||
|         line = headers['content-disposition'] | ||||
|         if not line: | ||||
|             continue | ||||
|         key, params = parse_header(line) | ||||
|         if key != 'form-data': | ||||
|             continue | ||||
|         if 'name' in params: | ||||
|             name = params['name'] | ||||
|         else: | ||||
|             continue | ||||
|         if name in partdict: | ||||
|             partdict[name].append(data) | ||||
|         else: | ||||
|             partdict[name] = [data] | ||||
| 
 | ||||
|     return partdict | ||||
| 
 | ||||
|     # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always | ||||
|     # represented as 7bit US-ASCII. | ||||
|     boundary = pdict['boundary'].decode('ascii') | ||||
|     ctype = "multipart/form-data; boundary={}".format(boundary) | ||||
|     headers = Message() | ||||
|     headers.set_type(ctype) | ||||
|     headers['Content-Length'] = pdict['CONTENT-LENGTH'] | ||||
|     fs = FieldStorage(fp, headers=headers, encoding=encoding, | ||||
|         environ={'REQUEST_METHOD': 'POST'}) | ||||
|     return {k: fs.getlist(k) for k in fs} | ||||
| 
 | ||||
| def _parseparam(s): | ||||
|     while s[:1] == ';': | ||||
|  |  | |||
|  | @ -126,8 +126,8 @@ def test_parse_multipart(self): | |||
|         env = {'boundary': BOUNDARY.encode('latin1'), | ||||
|                'CONTENT-LENGTH': '558'} | ||||
|         result = cgi.parse_multipart(fp, env) | ||||
|         expected = {'submit': [b' Add '], 'id': [b'1234'], | ||||
|                     'file': [b'Testing 123.\n'], 'title': [b'']} | ||||
|         expected = {'submit': [' Add '], 'id': ['1234'], | ||||
|                     'file': [b'Testing 123.\n'], 'title': ['']} | ||||
|         self.assertEqual(result, expected) | ||||
| 
 | ||||
|     def test_fieldstorage_properties(self): | ||||
|  |  | |||
|  | @ -317,6 +317,10 @@ Extension Modules | |||
| Library | ||||
| ------- | ||||
| 
 | ||||
| - bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and | ||||
|   making its results consistent with those of FieldStorage for | ||||
|   multipart/form-data requests. Patch by Pierre Quentel. | ||||
| 
 | ||||
| - bpo-30243: Removed the __init__ methods of _json's scanner and encoder. | ||||
|   Misusing them could cause memory leaks or crashes.  Now scanner and encoder | ||||
|   objects are completely initialized in the __new__ methods. | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pierre Quentel
						Pierre Quentel