mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	gh-129173: refactor PyCodec_ReplaceErrors into separate functions (#129893)
				
					
				
			The logic of `PyCodec_ReplaceErrors` is now split into separate functions, each of which handling a specific exception type.
This commit is contained in:
		
							parent
							
								
									4d3a7ea354
								
							
						
					
					
						commit
						fa6a8140dd
					
				
					 1 changed files with 83 additions and 40 deletions
				
			
		
							
								
								
									
										123
									
								
								Python/codecs.c
									
										
									
									
									
								
							
							
						
						
									
										123
									
								
								Python/codecs.c
									
										
									
									
									
								
							|  | @ -730,6 +730,27 @@ codec_handler_write_unicode_hex(Py_UCS1 **p, Py_UCS4 ch) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Create a Unicode string containing 'count' copies of the official | ||||||
|  |  * Unicode REPLACEMENT CHARACTER (0xFFFD). | ||||||
|  |  */ | ||||||
|  | static PyObject * | ||||||
|  | codec_handler_unicode_replacement_character(Py_ssize_t count) | ||||||
|  | { | ||||||
|  |     PyObject *res = PyUnicode_New(count, Py_UNICODE_REPLACEMENT_CHARACTER); | ||||||
|  |     if (res == NULL) { | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     assert(count == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND); | ||||||
|  |     Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res); | ||||||
|  |     for (Py_ssize_t i = 0; i < count; ++i) { | ||||||
|  |         outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER; | ||||||
|  |     } | ||||||
|  |     assert(_PyUnicode_CheckConsistency(res, 1)); | ||||||
|  |     return res; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| // --- handler: 'strict' ------------------------------------------------------
 | // --- handler: 'strict' ------------------------------------------------------
 | ||||||
| 
 | 
 | ||||||
| PyObject *PyCodec_StrictErrors(PyObject *exc) | PyObject *PyCodec_StrictErrors(PyObject *exc) | ||||||
|  | @ -774,50 +795,71 @@ PyObject *PyCodec_IgnoreErrors(PyObject *exc) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| PyObject *PyCodec_ReplaceErrors(PyObject *exc) | // --- handler: 'replace' -----------------------------------------------------
 | ||||||
|  | 
 | ||||||
|  | static PyObject * | ||||||
|  | _PyCodec_ReplaceUnicodeEncodeError(PyObject *exc) | ||||||
| { | { | ||||||
|     Py_ssize_t start, end, slen; |     Py_ssize_t start, end, slen; | ||||||
|  |     if (_PyUnicodeError_GetParams(exc, NULL, NULL, | ||||||
|  |                                   &start, &end, &slen, false) < 0) | ||||||
|  |     { | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     PyObject *res = PyUnicode_New(slen, '?'); | ||||||
|  |     if (res == NULL) { | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND); | ||||||
|  |     Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); | ||||||
|  |     memset(outp, '?', sizeof(Py_UCS1) * slen); | ||||||
|  |     assert(_PyUnicode_CheckConsistency(res, 1)); | ||||||
|  |     return Py_BuildValue("(Nn)", res, end); | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|     if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { | 
 | ||||||
|         if (_PyUnicodeError_GetParams(exc, NULL, NULL, | static PyObject * | ||||||
|                                       &start, &end, &slen, false) < 0) { | _PyCodec_ReplaceUnicodeDecodeError(PyObject *exc) | ||||||
|             return NULL; | { | ||||||
|         } |     Py_ssize_t end; | ||||||
|         PyObject *res = PyUnicode_New(slen, '?'); |     if (PyUnicodeDecodeError_GetEnd(exc, &end) < 0) { | ||||||
|         if (res == NULL) { |         return NULL; | ||||||
|             return NULL; |  | ||||||
|         } |  | ||||||
|         assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND); |  | ||||||
|         Py_UCS1 *outp = PyUnicode_1BYTE_DATA(res); |  | ||||||
|         memset(outp, '?', sizeof(Py_UCS1) * slen); |  | ||||||
|         assert(_PyUnicode_CheckConsistency(res, 1)); |  | ||||||
|         return Py_BuildValue("(Nn)", res, end); |  | ||||||
|     } |     } | ||||||
|     else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { |     PyObject *res = codec_handler_unicode_replacement_character(1); | ||||||
|         if (_PyUnicodeError_GetParams(exc, NULL, NULL, |     if (res == NULL) { | ||||||
|                                       NULL, &end, NULL, true) < 0) { |         return NULL; | ||||||
|             return NULL; |  | ||||||
|         } |  | ||||||
|         return Py_BuildValue("(Cn)", |  | ||||||
|                              (int)Py_UNICODE_REPLACEMENT_CHARACTER, |  | ||||||
|                              end); |  | ||||||
|     } |     } | ||||||
|     else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { |     return Py_BuildValue("(Nn)", res, end); | ||||||
|         if (_PyUnicodeError_GetParams(exc, NULL, NULL, | } | ||||||
|                                       &start, &end, &slen, false) < 0) { | 
 | ||||||
|             return NULL; | 
 | ||||||
|         } | static PyObject * | ||||||
|         PyObject *res = PyUnicode_New(slen, Py_UNICODE_REPLACEMENT_CHARACTER); | _PyCodec_ReplaceUnicodeTranslateError(PyObject *exc) | ||||||
|         if (res == NULL) { | { | ||||||
|             return NULL; |     Py_ssize_t start, end, slen; | ||||||
|         } |     if (_PyUnicodeError_GetParams(exc, NULL, NULL, | ||||||
|         assert(slen == 0 || PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND); |                                   &start, &end, &slen, false) < 0) | ||||||
|         Py_UCS2 *outp = PyUnicode_2BYTE_DATA(res); |     { | ||||||
|         for (Py_ssize_t i = 0; i < slen; ++i) { |         return NULL; | ||||||
|             outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER; |     } | ||||||
|         } |     PyObject *res = codec_handler_unicode_replacement_character(slen); | ||||||
|         assert(_PyUnicode_CheckConsistency(res, 1)); |     if (res == NULL) { | ||||||
|         return Py_BuildValue("(Nn)", res, end); |         return NULL; | ||||||
|  |     } | ||||||
|  |     return Py_BuildValue("(Nn)", res, end); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | PyObject *PyCodec_ReplaceErrors(PyObject *exc) | ||||||
|  | { | ||||||
|  |     if (_PyIsUnicodeEncodeError(exc)) { | ||||||
|  |         return _PyCodec_ReplaceUnicodeEncodeError(exc); | ||||||
|  |     } | ||||||
|  |     else if (_PyIsUnicodeDecodeError(exc)) { | ||||||
|  |         return _PyCodec_ReplaceUnicodeDecodeError(exc); | ||||||
|  |     } | ||||||
|  |     else if (_PyIsUnicodeTranslateError(exc)) { | ||||||
|  |         return _PyCodec_ReplaceUnicodeTranslateError(exc); | ||||||
|     } |     } | ||||||
|     else { |     else { | ||||||
|         wrong_exception_type(exc); |         wrong_exception_type(exc); | ||||||
|  | @ -1468,7 +1510,8 @@ ignore_errors(PyObject *Py_UNUSED(self), PyObject *exc) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| static PyObject *replace_errors(PyObject *self, PyObject *exc) | static inline PyObject * | ||||||
|  | replace_errors(PyObject *Py_UNUSED(self), PyObject *exc) | ||||||
| { | { | ||||||
|     return PyCodec_ReplaceErrors(exc); |     return PyCodec_ReplaceErrors(exc); | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Bénédikt Tran
						Bénédikt Tran