mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	gh-129173: refactor PyCodec_ReplaceErrors into separate functions (#129893)
				
					
				
			The logic of `PyCodec_ReplaceErrors` is now split into separate functions, each of which handling a specific exception type.
This commit is contained in:
		
							parent
							
								
									4d3a7ea354
								
							
						
					
					
						commit
						fa6a8140dd
					
				
					 1 changed files with 83 additions and 40 deletions
				
			
		|  | @ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch) | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * Create a Unicode string containing 'count' copies of the official | ||||
|  * Unicode REPLACEMENT CHARACTER (0xFFFD). | ||||
|  */ | ||||
| static PyObject * | ||||
| codec_handler_unicode_replacement_character(Py_ssize_t count) | ||||
| { | ||||
|     PyObject *res = PyUnicode_New(count, Py_UNICODE_REPLACEMENT_CHARACTER); | ||||
|     if (res == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
|     assert(count == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND); | ||||
|     Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res); | ||||
|     for (Py_ssize_t i = 0; i < count; ++i) { | ||||
|         outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER; | ||||
|     } | ||||
|     assert(_PyUnicode_CheckConsistency(res, 1)); | ||||
|     return res; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| // --- handler: 'strict' ------------------------------------------------------
 | ||||
| 
 | ||||
| PyObject *PyCodec_StrictErrors(PyObject *exc) | ||||
|  | @ -774,13 +795,15 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc) | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| PyObject *PyCodec_ReplaceErrors(PyObject *exc) | ||||
| // --- handler: 'replace' -----------------------------------------------------
 | ||||
| 
 | ||||
| static PyObject * | ||||
| _PyCodec_ReplaceUnicodeEncodeError(PyObject *exc) | ||||
| { | ||||
|     Py_ssize_t start, end, slen; | ||||
| 
 | ||||
|     if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { | ||||
|     if (_PyUnicodeError_GetParams(exc, NULL, NULL, | ||||
|                                       &start, &end, &slen, false) < 0) { | ||||
|                                   &start, &end, &slen, false) < 0) | ||||
|     { | ||||
|         return NULL; | ||||
|     } | ||||
|     PyObject *res = PyUnicode_New(slen, '?'); | ||||
|  | @ -793,32 +816,51 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc) | |||
|     assert(_PyUnicode_CheckConsistency(res, 1)); | ||||
|     return Py_BuildValue("(Nn)", res, end); | ||||
| } | ||||
|     else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { | ||||
|         if (_PyUnicodeError_GetParams(exc, NULL, NULL, | ||||
|                                       NULL, &end, NULL, true) < 0) { | ||||
| 
 | ||||
| 
 | ||||
| static PyObject * | ||||
| _PyCodec_ReplaceUnicodeDecodeError(PyObject *exc) | ||||
| { | ||||
|     Py_ssize_t end; | ||||
|     if (PyUnicodeDecodeError_GetEnd(exc, &end) < 0) { | ||||
|         return NULL; | ||||
|     } | ||||
|         return Py_BuildValue("(Cn)", | ||||
|                              (int)Py_UNICODE_REPLACEMENT_CHARACTER, | ||||
|                              end); | ||||
|     } | ||||
|     else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { | ||||
|         if (_PyUnicodeError_GetParams(exc, NULL, NULL, | ||||
|                                       &start, &end, &slen, false) < 0) { | ||||
|             return NULL; | ||||
|         } | ||||
|         PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER); | ||||
|     PyObject *res = codec_handler_unicode_replacement_character(1); | ||||
|     if (res == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
|         assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND); | ||||
|         Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res); | ||||
|         for (Py_ssize_t i = 0; i < slen; ++i) { | ||||
|             outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER; | ||||
|         } | ||||
|         assert(_PyUnicode_CheckConsistency(res, 1)); | ||||
|     return Py_BuildValue("(Nn)", res, end); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static PyObject * | ||||
| _PyCodec_ReplaceUnicodeTranslateError(PyObject *exc) | ||||
| { | ||||
|     Py_ssize_t start, end, slen; | ||||
|     if (_PyUnicodeError_GetParams(exc, NULL, NULL, | ||||
|                                   &start, &end, &slen, false) < 0) | ||||
|     { | ||||
|         return NULL; | ||||
|     } | ||||
|     PyObject *res = codec_handler_unicode_replacement_character(slen); | ||||
|     if (res == NULL) { | ||||
|         return NULL; | ||||
|     } | ||||
|     return Py_BuildValue("(Nn)", res, end); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| PyObject *PyCodec_ReplaceErrors(PyObject *exc) | ||||
| { | ||||
|     if (_PyIsUnicodeEncodeError(exc)) { | ||||
|         return _PyCodec_ReplaceUnicodeEncodeError(exc); | ||||
|     } | ||||
|     else if (_PyIsUnicodeDecodeError(exc)) { | ||||
|         return _PyCodec_ReplaceUnicodeDecodeError(exc); | ||||
|     } | ||||
|     else if (_PyIsUnicodeTranslateError(exc)) { | ||||
|         return _PyCodec_ReplaceUnicodeTranslateError(exc); | ||||
|     } | ||||
|     else { | ||||
|         wrong_exception_type(exc); | ||||
|         return NULL; | ||||
|  | @ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc) | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| static PyObject *replace_errors(PyObject *self, PyObject *exc) | ||||
| static inline PyObject * | ||||
| replace_errors(PyObject *Py_UNUSED(self), PyObject *exc) | ||||
| { | ||||
|     return PyCodec_ReplaceErrors(exc); | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Bénédikt Tran
						Bénédikt Tran