mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	[3.11] gh-99612: Fix PyUnicode_DecodeUTF8Stateful() for ASCII-only data (GH-99613) (GH-107224)
Previously *consumed was not set in this case.
(cherry picked from commit f08e52ccb0)
			
			
This commit is contained in:
		
							parent
							
								
									058741cc39
								
							
						
					
					
						commit
						b8b3e6afc0
					
				
					 4 changed files with 95 additions and 1 deletions
				
			
		
							
								
								
									
										54
									
								
								Lib/test/test_capi/test_codecs.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								Lib/test/test_capi/test_codecs.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,54 @@ | ||||||
|  | import unittest | ||||||
|  | from test.support import import_helper | ||||||
|  | 
 | ||||||
|  | _testcapi = import_helper.import_module('_testcapi') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class CAPITest(unittest.TestCase): | ||||||
|  | 
 | ||||||
|  |     def test_decodeutf8(self): | ||||||
|  |         """Test PyUnicode_DecodeUTF8()""" | ||||||
|  |         decodeutf8 = _testcapi.unicode_decodeutf8 | ||||||
|  | 
 | ||||||
|  |         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: | ||||||
|  |             b = s.encode('utf-8') | ||||||
|  |             self.assertEqual(decodeutf8(b), s) | ||||||
|  |             self.assertEqual(decodeutf8(b, 'strict'), s) | ||||||
|  | 
 | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80') | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0') | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff') | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f') | ||||||
|  |         self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd') | ||||||
|  |         self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb') | ||||||
|  | 
 | ||||||
|  |         self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo') | ||||||
|  |         # TODO: Test PyUnicode_DecodeUTF8() with NULL as data and | ||||||
|  |         # negative size. | ||||||
|  | 
 | ||||||
|  |     def test_decodeutf8stateful(self): | ||||||
|  |         """Test PyUnicode_DecodeUTF8Stateful()""" | ||||||
|  |         decodeutf8stateful = _testcapi.unicode_decodeutf8stateful | ||||||
|  | 
 | ||||||
|  |         for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: | ||||||
|  |             b = s.encode('utf-8') | ||||||
|  |             self.assertEqual(decodeutf8stateful(b), (s, len(b))) | ||||||
|  |             self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b))) | ||||||
|  | 
 | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80') | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0') | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff') | ||||||
|  |         self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1)) | ||||||
|  |         self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1)) | ||||||
|  |         self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb') | ||||||
|  |         self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4)) | ||||||
|  | 
 | ||||||
|  |         self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo') | ||||||
|  |         # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and | ||||||
|  |         # negative size. | ||||||
|  |         # TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of | ||||||
|  |         # "consumed". | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     unittest.main() | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | Fix :c:func:`PyUnicode_DecodeUTF8Stateful` for ASCII-only data: | ||||||
|  | ``*consumed`` was not set. | ||||||
|  | @ -2307,6 +2307,40 @@ unicode_asutf8andsize(PyObject *self, PyObject *args) | ||||||
|     return Py_BuildValue("(Nn)", result, utf8_len); |     return Py_BuildValue("(Nn)", result, utf8_len); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /* Test PyUnicode_DecodeUTF8() */ | ||||||
|  | static PyObject * | ||||||
|  | unicode_decodeutf8(PyObject *self, PyObject *args) | ||||||
|  | { | ||||||
|  |     const char *data; | ||||||
|  |     Py_ssize_t size; | ||||||
|  |     const char *errors = NULL; | ||||||
|  | 
 | ||||||
|  |     if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) | ||||||
|  |         return NULL; | ||||||
|  | 
 | ||||||
|  |     return PyUnicode_DecodeUTF8(data, size, errors); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Test PyUnicode_DecodeUTF8Stateful() */ | ||||||
|  | static PyObject * | ||||||
|  | unicode_decodeutf8stateful(PyObject *self, PyObject *args) | ||||||
|  | { | ||||||
|  |     const char *data; | ||||||
|  |     Py_ssize_t size; | ||||||
|  |     const char *errors = NULL; | ||||||
|  |     Py_ssize_t consumed = 123456789; | ||||||
|  |     PyObject *result; | ||||||
|  | 
 | ||||||
|  |     if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors)) | ||||||
|  |         return NULL; | ||||||
|  | 
 | ||||||
|  |     result = PyUnicode_DecodeUTF8Stateful(data, size, errors, &consumed); | ||||||
|  |     if (!result) { | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     return Py_BuildValue("(Nn)", result, consumed); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static PyObject * | static PyObject * | ||||||
| unicode_findchar(PyObject *self, PyObject *args) | unicode_findchar(PyObject *self, PyObject *args) | ||||||
| { | { | ||||||
|  | @ -6562,7 +6596,8 @@ static PyMethodDef TestMethods[] = { | ||||||
|     {"unicode_asucs4",          unicode_asucs4,                  METH_VARARGS}, |     {"unicode_asucs4",          unicode_asucs4,                  METH_VARARGS}, | ||||||
|     {"unicode_asutf8",          unicode_asutf8,                  METH_VARARGS}, |     {"unicode_asutf8",          unicode_asutf8,                  METH_VARARGS}, | ||||||
|     {"unicode_asutf8andsize",   unicode_asutf8andsize,           METH_VARARGS}, |     {"unicode_asutf8andsize",   unicode_asutf8andsize,           METH_VARARGS}, | ||||||
|     {"unicode_findchar",        unicode_findchar,                METH_VARARGS}, |     {"unicode_decodeutf8",       unicode_decodeutf8,             METH_VARARGS}, | ||||||
|  |     {"unicode_decodeutf8stateful",unicode_decodeutf8stateful,    METH_VARARGS},    {"unicode_findchar",        unicode_findchar,                METH_VARARGS}, | ||||||
|     {"unicode_copycharacters",  unicode_copycharacters,          METH_VARARGS}, |     {"unicode_copycharacters",  unicode_copycharacters,          METH_VARARGS}, | ||||||
| #if USE_UNICODE_WCHAR_CACHE | #if USE_UNICODE_WCHAR_CACHE | ||||||
|     {"unicode_legacy_string",   unicode_legacy_string,           METH_VARARGS}, |     {"unicode_legacy_string",   unicode_legacy_string,           METH_VARARGS}, | ||||||
|  |  | ||||||
|  | @ -5120,6 +5120,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, | ||||||
|     } |     } | ||||||
|     s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); |     s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); | ||||||
|     if (s == end) { |     if (s == end) { | ||||||
|  |         if (consumed) { | ||||||
|  |             *consumed = size; | ||||||
|  |         } | ||||||
|         return u; |         return u; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka