mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Forward port some fixes that were in email 2.5 but for some reason didn't make
it into email 4.0. Specifically, in Message.get_content_charset(), handle RFC 2231 headers that contain an encoding not known to Python, or a character in the data that isn't in the charset encoding. Also forward port the appropriate unit tests.
This commit is contained in:
		
							parent
							
								
									9815f8b252
								
							
						
					
					
						commit
						d92ae78bdb
					
				
					 3 changed files with 100 additions and 1 deletions
				
			
		|  | @ -747,7 +747,18 @@ def get_content_charset(self, failobj=None): | ||||||
|         if isinstance(charset, tuple): |         if isinstance(charset, tuple): | ||||||
|             # RFC 2231 encoded, so decode it, and it better end up as ascii. |             # RFC 2231 encoded, so decode it, and it better end up as ascii. | ||||||
|             pcharset = charset[0] or 'us-ascii' |             pcharset = charset[0] or 'us-ascii' | ||||||
|  |             try: | ||||||
|  |                 # LookupError will be raised if the charset isn't known to | ||||||
|  |                 # Python.  UnicodeError will be raised if the encoded text | ||||||
|  |                 # contains a character not in the charset. | ||||||
|                 charset = unicode(charset[2], pcharset).encode('us-ascii') |                 charset = unicode(charset[2], pcharset).encode('us-ascii') | ||||||
|  |             except (LookupError, UnicodeError): | ||||||
|  |                 charset = charset[2] | ||||||
|  |         # charset character must be in us-ascii range | ||||||
|  |         try: | ||||||
|  |             charset = unicode(charset, 'us-ascii').encode('us-ascii') | ||||||
|  |         except UnicodeError: | ||||||
|  |             return failobj | ||||||
|         # RFC 2046, $4.1.2 says charsets are not case sensitive |         # RFC 2046, $4.1.2 says charsets are not case sensitive | ||||||
|         return charset.lower() |         return charset.lower() | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3086,6 +3086,50 @@ def test_rfc2231_no_language_or_charset_in_charset(self): | ||||||
|         self.assertEqual(msg.get_content_charset(), |         self.assertEqual(msg.get_content_charset(), | ||||||
|                          'this is even more ***fun*** is it not.pdf') |                          'this is even more ***fun*** is it not.pdf') | ||||||
| 
 | 
 | ||||||
|  |     def test_rfc2231_bad_encoding_in_filename(self): | ||||||
|  |         m = '''\ | ||||||
|  | Content-Disposition: inline; | ||||||
|  | \tfilename*0*="bogus'xx'This%20is%20even%20more%20"; | ||||||
|  | \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||||
|  | \tfilename*2="is it not.pdf" | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         self.assertEqual(msg.get_filename(), | ||||||
|  |                          'This is even more ***fun*** is it not.pdf') | ||||||
|  | 
 | ||||||
|  |     def test_rfc2231_bad_encoding_in_charset(self): | ||||||
|  |         m = """\ | ||||||
|  | Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         # This should return None because non-ascii characters in the charset | ||||||
|  |         # are not allowed. | ||||||
|  |         self.assertEqual(msg.get_content_charset(), None) | ||||||
|  | 
 | ||||||
|  |     def test_rfc2231_bad_character_in_charset(self): | ||||||
|  |         m = """\ | ||||||
|  | Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         # This should return None because non-ascii characters in the charset | ||||||
|  |         # are not allowed. | ||||||
|  |         self.assertEqual(msg.get_content_charset(), None) | ||||||
|  | 
 | ||||||
|  |     def test_rfc2231_bad_character_in_filename(self): | ||||||
|  |         m = '''\ | ||||||
|  | Content-Disposition: inline; | ||||||
|  | \tfilename*0*="ascii'xx'This%20is%20even%20more%20"; | ||||||
|  | \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||||
|  | \tfilename*2*="is it not.pdf%E2" | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         self.assertEqual(msg.get_filename(), | ||||||
|  |                          u'This is even more ***fun*** is it not.pdf\ufffd') | ||||||
|  | 
 | ||||||
|     def test_rfc2231_unknown_encoding(self): |     def test_rfc2231_unknown_encoding(self): | ||||||
|         m = """\ |         m = """\ | ||||||
| Content-Transfer-Encoding: 8bit | Content-Transfer-Encoding: 8bit | ||||||
|  |  | ||||||
|  | @ -3092,6 +3092,50 @@ def test_rfc2231_no_language_or_charset_in_charset(self): | ||||||
|         self.assertEqual(msg.get_content_charset(), |         self.assertEqual(msg.get_content_charset(), | ||||||
|                          'this is even more ***fun*** is it not.pdf') |                          'this is even more ***fun*** is it not.pdf') | ||||||
| 
 | 
 | ||||||
|  |     def test_rfc2231_bad_encoding_in_filename(self): | ||||||
|  |         m = '''\ | ||||||
|  | Content-Disposition: inline; | ||||||
|  | \tfilename*0*="bogus'xx'This%20is%20even%20more%20"; | ||||||
|  | \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||||
|  | \tfilename*2="is it not.pdf" | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         self.assertEqual(msg.get_filename(), | ||||||
|  |                          'This is even more ***fun*** is it not.pdf') | ||||||
|  | 
 | ||||||
|  |     def test_rfc2231_bad_encoding_in_charset(self): | ||||||
|  |         m = """\ | ||||||
|  | Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         # This should return None because non-ascii characters in the charset | ||||||
|  |         # are not allowed. | ||||||
|  |         self.assertEqual(msg.get_content_charset(), None) | ||||||
|  | 
 | ||||||
|  |     def test_rfc2231_bad_character_in_charset(self): | ||||||
|  |         m = """\ | ||||||
|  | Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         # This should return None because non-ascii characters in the charset | ||||||
|  |         # are not allowed. | ||||||
|  |         self.assertEqual(msg.get_content_charset(), None) | ||||||
|  | 
 | ||||||
|  |     def test_rfc2231_bad_character_in_filename(self): | ||||||
|  |         m = '''\ | ||||||
|  | Content-Disposition: inline; | ||||||
|  | \tfilename*0*="ascii'xx'This%20is%20even%20more%20"; | ||||||
|  | \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; | ||||||
|  | \tfilename*2*="is it not.pdf%E2" | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  |         msg = email.message_from_string(m) | ||||||
|  |         self.assertEqual(msg.get_filename(), | ||||||
|  |                          u'This is even more ***fun*** is it not.pdf\ufffd') | ||||||
|  | 
 | ||||||
|     def test_rfc2231_unknown_encoding(self): |     def test_rfc2231_unknown_encoding(self): | ||||||
|         m = """\ |         m = """\ | ||||||
| Content-Transfer-Encoding: 8bit | Content-Transfer-Encoding: 8bit | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Barry Warsaw
						Barry Warsaw