mirror of
				https://github.com/python/cpython.git
				synced 2025-11-01 06:01:29 +00:00 
			
		
		
		
	Issue #14579: Fix CVE-2012-2135: vulnerability in the utf-16 decoder after error handling.
Patch by Serhiy Storchaka.
This commit is contained in:
		
							parent
							
								
									ca9652ea5d
								
							
						
					
					
						commit
						b4bbee25b1
					
				
					 3 changed files with 50 additions and 35 deletions
				
			
		|  | @ -540,8 +540,19 @@ def test_partial(self): | |||
|         ) | ||||
| 
 | ||||
|     def test_errors(self): | ||||
|         tests = [ | ||||
|             (b'\xff', '\ufffd'), | ||||
|             (b'A\x00Z', 'A\ufffd'), | ||||
|             (b'A\x00B\x00C\x00D\x00Z', 'ABCD\ufffd'), | ||||
|             (b'\x00\xd8', '\ufffd'), | ||||
|             (b'\x00\xd8A', '\ufffd'), | ||||
|             (b'\x00\xd8A\x00', '\ufffdA'), | ||||
|             (b'\x00\xdcA\x00', '\ufffdA'), | ||||
|         ] | ||||
|         for raw, expected in tests: | ||||
|             self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, | ||||
|                           b"\xff", "strict", True) | ||||
|                               raw, 'strict', True) | ||||
|             self.assertEqual(raw.decode('utf-16le', 'replace'), expected) | ||||
| 
 | ||||
|     def test_nonbmp(self): | ||||
|         self.assertEqual("\U00010203".encode(self.encoding), | ||||
|  | @ -568,8 +579,19 @@ def test_partial(self): | |||
|         ) | ||||
| 
 | ||||
|     def test_errors(self): | ||||
|         tests = [ | ||||
|             (b'\xff', '\ufffd'), | ||||
|             (b'\x00A\xff', 'A\ufffd'), | ||||
|             (b'\x00A\x00B\x00C\x00DZ', 'ABCD\ufffd'), | ||||
|             (b'\xd8\x00', '\ufffd'), | ||||
|             (b'\xd8\x00\xdc', '\ufffd'), | ||||
|             (b'\xd8\x00\x00A', '\ufffdA'), | ||||
|             (b'\xdc\x00\x00A', '\ufffdA'), | ||||
|         ] | ||||
|         for raw, expected in tests: | ||||
|             self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, | ||||
|                           b"\xff", "strict", True) | ||||
|                               raw, 'strict', True) | ||||
|             self.assertEqual(raw.decode('utf-16be', 'replace'), expected) | ||||
| 
 | ||||
|     def test_nonbmp(self): | ||||
|         self.assertEqual("\U00010203".encode(self.encoding), | ||||
|  |  | |||
|  | @ -10,6 +10,9 @@ What's New in Python 3.2.4 | |||
| Core and Builtins | ||||
| ----------------- | ||||
| 
 | ||||
| - Issue #14579: Fix CVE-2012-2135: vulnerability in the utf-16 decoder after | ||||
|   error handling.  Patch by Serhiy Storchaka. | ||||
| 
 | ||||
| - Issue #15404: Refleak in PyMethodObject repr. | ||||
| 
 | ||||
| - Issue #15394: An issue in PyModule_Create that caused references to | ||||
|  |  | |||
|  | @ -3425,7 +3425,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s, | |||
|     /* Unpack UTF-16 encoded data */ | ||||
|     p = unicode->str; | ||||
|     q = (unsigned char *)s; | ||||
|     e = q + size - 1; | ||||
|     e = q + size; | ||||
| 
 | ||||
|     if (byteorder) | ||||
|         bo = *byteorder; | ||||
|  | @ -3476,8 +3476,20 @@ PyUnicode_DecodeUTF16Stateful(const char *s, | |||
| #endif | ||||
| 
 | ||||
|     aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); | ||||
|     while (q < e) { | ||||
|     while (1) { | ||||
|         Py_UNICODE ch; | ||||
|         if (e - q < 2) { | ||||
|             /* remaining byte at the end? (size should be even) */ | ||||
|             if (q == e || consumed) | ||||
|                 break; | ||||
|             errmsg = "truncated data"; | ||||
|             startinpos = ((const char *)q) - starts; | ||||
|             endinpos = ((const char *)e) - starts; | ||||
|             outpos = p - PyUnicode_AS_UNICODE(unicode); | ||||
|             goto utf16Error; | ||||
|             /* The remaining input chars are ignored if the callback
 | ||||
|                chooses to skip the input */ | ||||
|         } | ||||
|         /* First check for possible aligned read of a C 'long'. Unaligned
 | ||||
|            reads are more expensive, better to defer to another iteration. */ | ||||
|         if (!((size_t) q & LONG_PTR_MASK)) { | ||||
|  | @ -3546,8 +3558,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, | |||
|             } | ||||
|             p = _p; | ||||
|             q = _q; | ||||
|             if (q >= e) | ||||
|                 break; | ||||
|             if (e - q < 2) | ||||
|                 continue; | ||||
|         } | ||||
|         ch = (q[ihi] << 8) | q[ilo]; | ||||
| 
 | ||||
|  | @ -3559,10 +3571,10 @@ PyUnicode_DecodeUTF16Stateful(const char *s, | |||
|         } | ||||
| 
 | ||||
|         /* UTF-16 code pair: */ | ||||
|         if (q > e) { | ||||
|         if (e - q < 2) { | ||||
|             errmsg = "unexpected end of data"; | ||||
|             startinpos = (((const char *)q) - 2) - starts; | ||||
|             endinpos = ((const char *)e) + 1 - starts; | ||||
|             endinpos = ((const char *)e) - starts; | ||||
|             goto utf16Error; | ||||
|         } | ||||
|         if (0xD800 <= ch && ch <= 0xDBFF) { | ||||
|  | @ -3606,31 +3618,9 @@ PyUnicode_DecodeUTF16Stateful(const char *s, | |||
|                 &outpos, | ||||
|                 &p)) | ||||
|             goto onError; | ||||
|     } | ||||
|     /* remaining byte at the end? (size should be even) */ | ||||
|     if (e == q) { | ||||
|         if (!consumed) { | ||||
|             errmsg = "truncated data"; | ||||
|             startinpos = ((const char *)q) - starts; | ||||
|             endinpos = ((const char *)e) + 1 - starts; | ||||
|             outpos = p - PyUnicode_AS_UNICODE(unicode); | ||||
|             if (unicode_decode_call_errorhandler( | ||||
|                     errors, | ||||
|                     &errorHandler, | ||||
|                     "utf16", errmsg, | ||||
|                     &starts, | ||||
|                     (const char **)&e, | ||||
|                     &startinpos, | ||||
|                     &endinpos, | ||||
|                     &exc, | ||||
|                     (const char **)&q, | ||||
|                     &unicode, | ||||
|                     &outpos, | ||||
|                     &p)) | ||||
|                 goto onError; | ||||
|             /* The remaining input chars are ignored if the callback
 | ||||
|                chooses to skip the input */ | ||||
|         } | ||||
|         /* Update data because unicode_decode_call_errorhandler might have
 | ||||
|            changed the input object. */ | ||||
|         aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); | ||||
|     } | ||||
| 
 | ||||
|     if (byteorder) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Antoine Pitrou
						Antoine Pitrou