mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Issue #24848: Fixed bugs in UTF-7 decoding of misformed data:
1. Non-ASCII bytes were accepted after shift sequence. 2. A low surrogate could be emitted in case of error in high surrogate.
This commit is contained in:
		
							parent
							
								
									223349cfb8
								
							
						
					
					
						commit
						28b21e50c8
					
				
					 4 changed files with 75 additions and 11 deletions
				
			
		|  | @ -898,6 +898,32 @@ def test_readline(self): | |||
| class UTF7Test(ReadTest, unittest.TestCase): | ||||
|     encoding = "utf-7" | ||||
| 
 | ||||
|     def test_ascii(self): | ||||
|         # Set D (directly encoded characters) | ||||
|         set_d = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' | ||||
|                  'abcdefghijklmnopqrstuvwxyz' | ||||
|                  '0123456789' | ||||
|                  '\'(),-./:?') | ||||
|         self.assertEqual(set_d.encode(self.encoding), set_d.encode('ascii')) | ||||
|         self.assertEqual(set_d.encode('ascii').decode(self.encoding), set_d) | ||||
|         # Set O (optional direct characters) | ||||
|         set_o = ' !"#$%&*;<=>@[]^_`{|}' | ||||
|         self.assertEqual(set_o.encode(self.encoding), set_o.encode('ascii')) | ||||
|         self.assertEqual(set_o.encode('ascii').decode(self.encoding), set_o) | ||||
|         # + | ||||
|         self.assertEqual('a+b'.encode(self.encoding), b'a+-b') | ||||
|         self.assertEqual(b'a+-b'.decode(self.encoding), 'a+b') | ||||
|         # White spaces | ||||
|         ws = ' \t\n\r' | ||||
|         self.assertEqual(ws.encode(self.encoding), ws.encode('ascii')) | ||||
|         self.assertEqual(ws.encode('ascii').decode(self.encoding), ws) | ||||
|         # Other ASCII characters | ||||
|         other_ascii = ''.join(sorted(set(bytes(range(0x80)).decode()) - | ||||
|                                      set(set_d + set_o + '+' + ws))) | ||||
|         self.assertEqual(other_ascii.encode(self.encoding), | ||||
|                          b'+AAAAAQACAAMABAAFAAYABwAIAAsADAAOAA8AEAARABIAEwAU' | ||||
|                          b'ABUAFgAXABgAGQAaABsAHAAdAB4AHwBcAH4Afw-') | ||||
| 
 | ||||
|     def test_partial(self): | ||||
|         self.check_partial( | ||||
|             'a+-b\x00c\x80d\u0100e\U00010000f', | ||||
|  | @ -939,7 +965,9 @@ def test_partial(self): | |||
| 
 | ||||
|     def test_errors(self): | ||||
|         tests = [ | ||||
|             (b'\xffb', '\ufffdb'), | ||||
|             (b'a\xffb', 'a\ufffdb'), | ||||
|             (b'a\xff\xffb', 'a\ufffd\ufffdb'), | ||||
|             (b'a+IK', 'a\ufffd'), | ||||
|             (b'a+IK-b', 'a\ufffdb'), | ||||
|             (b'a+IK,b', 'a\ufffdb'), | ||||
|  | @ -955,6 +983,8 @@ def test_errors(self): | |||
|             (b'a+//,+IKw-b', 'a\ufffd\u20acb'), | ||||
|             (b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'), | ||||
|             (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), | ||||
|             (b'a+IKw-b\xff', 'a\u20acb\ufffd'), | ||||
|             (b'a+IKw\xffb', 'a\u20ac\ufffdb'), | ||||
|         ] | ||||
|         for raw, expected in tests: | ||||
|             with self.subTest(raw=raw): | ||||
|  | @ -966,8 +996,36 @@ def test_nonbmp(self): | |||
|         self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-') | ||||
|         self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-') | ||||
|         self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0') | ||||
|         self.assertEqual(b'+2AHcoA'.decode(self.encoding), '\U000104A0') | ||||
|         self.assertEqual('\u20ac\U000104A0'.encode(self.encoding), b'+IKzYAdyg-') | ||||
|         self.assertEqual(b'+IKzYAdyg-'.decode(self.encoding), '\u20ac\U000104A0') | ||||
|         self.assertEqual(b'+IKzYAdyg'.decode(self.encoding), '\u20ac\U000104A0') | ||||
|         self.assertEqual('\u20ac\u20ac\U000104A0'.encode(self.encoding), | ||||
|                          b'+IKwgrNgB3KA-') | ||||
|         self.assertEqual(b'+IKwgrNgB3KA-'.decode(self.encoding), | ||||
|                          '\u20ac\u20ac\U000104A0') | ||||
|         self.assertEqual(b'+IKwgrNgB3KA'.decode(self.encoding), | ||||
|                          '\u20ac\u20ac\U000104A0') | ||||
| 
 | ||||
|     test_lone_surrogates = None | ||||
|     def test_lone_surrogates(self): | ||||
|         tests = [ | ||||
|             (b'a+2AE-b', 'a\ud801b'), | ||||
|             (b'a+2AE\xffb', 'a\ufffdb'), | ||||
|             (b'a+2AE', 'a\ufffd'), | ||||
|             (b'a+2AEA-b', 'a\ufffdb'), | ||||
|             (b'a+2AH-b', 'a\ufffdb'), | ||||
|             (b'a+IKzYAQ-b', 'a\u20ac\ud801b'), | ||||
|             (b'a+IKzYAQ\xffb', 'a\u20ac\ufffdb'), | ||||
|             (b'a+IKzYAQA-b', 'a\u20ac\ufffdb'), | ||||
|             (b'a+IKzYAd-b', 'a\u20ac\ufffdb'), | ||||
|             (b'a+IKwgrNgB-b', 'a\u20ac\u20ac\ud801b'), | ||||
|             (b'a+IKwgrNgB\xffb', 'a\u20ac\u20ac\ufffdb'), | ||||
|             (b'a+IKwgrNgB', 'a\u20ac\u20ac\ufffd'), | ||||
|             (b'a+IKwgrNgBA-b', 'a\u20ac\u20ac\ufffdb'), | ||||
|         ] | ||||
|         for raw, expected in tests: | ||||
|             with self.subTest(raw=raw): | ||||
|                 self.assertEqual(raw.decode('utf-7', 'replace'), expected) | ||||
| 
 | ||||
| 
 | ||||
| class UTF16ExTest(unittest.TestCase): | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka