mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	Forward port some fixes that were in email 2.5 but for some reason didn't make
it into email 4.0. Specifically, in Message.get_content_charset(), handle RFC 2231 headers that contain an encoding not known to Python, or a character in the data that isn't in the charset encoding. Also forward port the appropriate unit tests.
This commit is contained in:
		
							parent
							
								
									9815f8b252
								
							
						
					
					
						commit
						d92ae78bdb
					
				
					 3 changed files with 100 additions and 1 deletions
				
			
		|  | @ -747,7 +747,18 @@ def get_content_charset(self, failobj=None): | |||
|         if isinstance(charset, tuple): | ||||
|             # RFC 2231 encoded, so decode it, and it better end up as ascii. | ||||
|             pcharset = charset[0] or 'us-ascii' | ||||
|             charset = unicode(charset[2], pcharset).encode('us-ascii') | ||||
|             try: | ||||
|                 # LookupError will be raised if the charset isn't known to | ||||
|                 # Python.  UnicodeError will be raised if the encoded text | ||||
|                 # contains a character not in the charset. | ||||
|                 charset = unicode(charset[2], pcharset).encode('us-ascii') | ||||
|             except (LookupError, UnicodeError): | ||||
|                 charset = charset[2] | ||||
|         # charset character must be in us-ascii range | ||||
|         try: | ||||
|             charset = unicode(charset, 'us-ascii').encode('us-ascii') | ||||
|         except UnicodeError: | ||||
|             return failobj | ||||
|         # RFC 2046, $4.1.2 says charsets are not case sensitive | ||||
|         return charset.lower() | ||||
| 
 | ||||
|  |  | |||
|  | @ -3086,6 +3086,50 @@ def test_rfc2231_no_language_or_charset_in_charset(self): | |||
|         self.assertEqual(msg.get_content_charset(), | ||||
|                          'this is even more ***fun*** is it not.pdf') | ||||
| 
 | ||||
|     def test_rfc2231_bad_encoding_in_filename(self): | ||||
|         m = '''\ | ||||
| Content-Disposition: inline; | ||||
| \tfilename*0*="bogus'xx'This%20is%20even%20more%20"; | ||||
| \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||
| \tfilename*2="is it not.pdf" | ||||
| 
 | ||||
| ''' | ||||
|         msg = email.message_from_string(m) | ||||
|         self.assertEqual(msg.get_filename(), | ||||
|                          'This is even more ***fun*** is it not.pdf') | ||||
| 
 | ||||
|     def test_rfc2231_bad_encoding_in_charset(self): | ||||
|         m = """\ | ||||
| Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D | ||||
| 
 | ||||
| """ | ||||
|         msg = email.message_from_string(m) | ||||
|         # This should return None because non-ascii characters in the charset | ||||
|         # are not allowed. | ||||
|         self.assertEqual(msg.get_content_charset(), None) | ||||
| 
 | ||||
|     def test_rfc2231_bad_character_in_charset(self): | ||||
|         m = """\ | ||||
| Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D | ||||
| 
 | ||||
| """ | ||||
|         msg = email.message_from_string(m) | ||||
|         # This should return None because non-ascii characters in the charset | ||||
|         # are not allowed. | ||||
|         self.assertEqual(msg.get_content_charset(), None) | ||||
| 
 | ||||
|     def test_rfc2231_bad_character_in_filename(self): | ||||
|         m = '''\ | ||||
| Content-Disposition: inline; | ||||
| \tfilename*0*="ascii'xx'This%20is%20even%20more%20"; | ||||
| \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||
| \tfilename*2*="is it not.pdf%E2" | ||||
| 
 | ||||
| ''' | ||||
|         msg = email.message_from_string(m) | ||||
|         self.assertEqual(msg.get_filename(), | ||||
|                          u'This is even more ***fun*** is it not.pdf\ufffd') | ||||
| 
 | ||||
|     def test_rfc2231_unknown_encoding(self): | ||||
|         m = """\ | ||||
| Content-Transfer-Encoding: 8bit | ||||
|  |  | |||
|  | @ -3092,6 +3092,50 @@ def test_rfc2231_no_language_or_charset_in_charset(self): | |||
|         self.assertEqual(msg.get_content_charset(), | ||||
|                          'this is even more ***fun*** is it not.pdf') | ||||
| 
 | ||||
|     def test_rfc2231_bad_encoding_in_filename(self): | ||||
|         m = '''\ | ||||
| Content-Disposition: inline; | ||||
| \tfilename*0*="bogus'xx'This%20is%20even%20more%20"; | ||||
| \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||
| \tfilename*2="is it not.pdf" | ||||
| 
 | ||||
| ''' | ||||
|         msg = email.message_from_string(m) | ||||
|         self.assertEqual(msg.get_filename(), | ||||
|                          'This is even more ***fun*** is it not.pdf') | ||||
| 
 | ||||
|     def test_rfc2231_bad_encoding_in_charset(self): | ||||
|         m = """\ | ||||
| Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D | ||||
| 
 | ||||
| """ | ||||
|         msg = email.message_from_string(m) | ||||
|         # This should return None because non-ascii characters in the charset | ||||
|         # are not allowed. | ||||
|         self.assertEqual(msg.get_content_charset(), None) | ||||
| 
 | ||||
|     def test_rfc2231_bad_character_in_charset(self): | ||||
|         m = """\ | ||||
| Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D | ||||
| 
 | ||||
| """ | ||||
|         msg = email.message_from_string(m) | ||||
|         # This should return None because non-ascii characters in the charset | ||||
|         # are not allowed. | ||||
|         self.assertEqual(msg.get_content_charset(), None) | ||||
| 
 | ||||
|     def test_rfc2231_bad_character_in_filename(self): | ||||
|         m = '''\ | ||||
| Content-Disposition: inline; | ||||
| \tfilename*0*="ascii'xx'This%20is%20even%20more%20"; | ||||
| \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||
| \tfilename*2*="is it not.pdf%E2" | ||||
| 
 | ||||
| ''' | ||||
|         msg = email.message_from_string(m) | ||||
|         self.assertEqual(msg.get_filename(), | ||||
|                          u'This is even more ***fun*** is it not.pdf\ufffd') | ||||
| 
 | ||||
|     def test_rfc2231_unknown_encoding(self): | ||||
|         m = """\ | ||||
| Content-Transfer-Encoding: 8bit | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Barry Warsaw
						Barry Warsaw