mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 10:44:55 +00:00 
			
		
		
		
	Issue #19279: UTF-7 decoder no more produces illegal strings.
This commit is contained in:
		
						commit
						55e092f545
					
				
					 3 changed files with 34 additions and 0 deletions
				
			
		|  | @ -820,6 +820,36 @@ def test_partial(self): | ||||||
|             ] |             ] | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     def test_errors(self): | ||||||
|  |         tests = [ | ||||||
|  |             (b'a\xffb', 'a\ufffdb'), | ||||||
|  |             (b'a+IK', 'a\ufffd'), | ||||||
|  |             (b'a+IK-b', 'a\ufffdb'), | ||||||
|  |             (b'a+IK,b', 'a\ufffdb'), | ||||||
|  |             (b'a+IKx', 'a\u20ac\ufffd'), | ||||||
|  |             (b'a+IKx-b', 'a\u20ac\ufffdb'), | ||||||
|  |             (b'a+IKwgr', 'a\u20ac\ufffd'), | ||||||
|  |             (b'a+IKwgr-b', 'a\u20ac\ufffdb'), | ||||||
|  |             (b'a+IKwgr,', 'a\u20ac\ufffd'), | ||||||
|  |             (b'a+IKwgr,-b', 'a\u20ac\ufffd-b'), | ||||||
|  |             (b'a+IKwgrB', 'a\u20ac\u20ac\ufffd'), | ||||||
|  |             (b'a+IKwgrB-b', 'a\u20ac\u20ac\ufffdb'), | ||||||
|  |             (b'a+/,+IKw-b', 'a\ufffd\u20acb'), | ||||||
|  |             (b'a+//,+IKw-b', 'a\ufffd\u20acb'), | ||||||
|  |             (b'a+///,+IKw-b', 'a\uffff\ufffd\u20acb'), | ||||||
|  |             (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), | ||||||
|  |         ] | ||||||
|  |         for raw, expected in tests: | ||||||
|  |             with self.subTest(raw=raw): | ||||||
|  |                 self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode, | ||||||
|  |                                 raw, 'strict', True) | ||||||
|  |                 self.assertEqual(raw.decode('utf-7', 'replace'), expected) | ||||||
|  | 
 | ||||||
|  |     def test_nonbmp(self): | ||||||
|  |         self.assertEqual('\U000104A0'.encode(self.encoding), b'+2AHcoA-') | ||||||
|  |         self.assertEqual('\ud801\udca0'.encode(self.encoding), b'+2AHcoA-') | ||||||
|  |         self.assertEqual(b'+2AHcoA-'.decode(self.encoding), '\U000104A0') | ||||||
|  | 
 | ||||||
| class UTF16ExTest(unittest.TestCase): | class UTF16ExTest(unittest.TestCase): | ||||||
| 
 | 
 | ||||||
|     def test_errors(self): |     def test_errors(self): | ||||||
|  |  | ||||||
|  | @ -10,6 +10,8 @@ Projected release date: 2013-10-20 | ||||||
| Core and Builtins | Core and Builtins | ||||||
| ----------------- | ----------------- | ||||||
| 
 | 
 | ||||||
|  | - Issue #19279: UTF-7 decoder no more produces illegal strings. | ||||||
|  | 
 | ||||||
| - Issue #16612: Add "Argument Clinic", a compile-time preprocessor for | - Issue #16612: Add "Argument Clinic", a compile-time preprocessor for | ||||||
|   C files to generate argument parsing code.  (See PEP 436.) |   C files to generate argument parsing code.  (See PEP 436.) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -4341,6 +4341,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s, | ||||||
|                     Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16)); |                     Py_UCS4 outCh = (Py_UCS4)(base64buffer >> (base64bits-16)); | ||||||
|                     base64bits -= 16; |                     base64bits -= 16; | ||||||
|                     base64buffer &= (1 << base64bits) - 1; /* clear high bits */ |                     base64buffer &= (1 << base64bits) - 1; /* clear high bits */ | ||||||
|  |                     assert(outCh <= 0xffff); | ||||||
|                     if (surrogate) { |                     if (surrogate) { | ||||||
|                         /* expecting a second surrogate */ |                         /* expecting a second surrogate */ | ||||||
|                         if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { |                         if (Py_UNICODE_IS_LOW_SURROGATE(outCh)) { | ||||||
|  | @ -4408,6 +4409,7 @@ PyUnicode_DecodeUTF7Stateful(const char *s, | ||||||
|                 inShift = 1; |                 inShift = 1; | ||||||
|                 shiftOutStart = writer.pos; |                 shiftOutStart = writer.pos; | ||||||
|                 base64bits = 0; |                 base64bits = 0; | ||||||
|  |                 base64buffer = 0; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ |         else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka