mirror of
				https://github.com/python/cpython.git
				synced 2025-11-01 06:01:29 +00:00 
			
		
		
		
	Rename utf8b error handler to surrogateescape.
This commit is contained in:
		
							parent
							
								
									e0a2b72e61
								
							
						
					
					
						commit
						43c57785d3
					
				
					 9 changed files with 30 additions and 30 deletions
				
			
		|  | @ -322,7 +322,7 @@ and implemented by all standard Python codecs: | |||
| | ``'backslashreplace'``  | Replace with backslashed escape sequences     | | ||||
| |                         | (only for encoding).                          | | ||||
| +-------------------------+-----------------------------------------------+ | ||||
| | ``'utf8b'``             | Replace byte with surrogate U+DCxx.           | | ||||
| | ``'surrogateescape'``   | Replace byte with surrogate U+DCxx.           | | ||||
| +-------------------------+-----------------------------------------------+ | ||||
| 
 | ||||
| In addition, the following error handlers are specific to a single codec: | ||||
|  | @ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec: | |||
| +-------------------+---------+-------------------------------------------+ | ||||
| 
 | ||||
| .. versionadded:: 3.1 | ||||
|    The ``'utf8b'`` and ``'surrogatepass'`` error handlers. | ||||
|    The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers. | ||||
| 
 | ||||
| The set of allowed values can be extended via :meth:`register_error`. | ||||
| 
 | ||||
|  |  | |||
|  | @ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`). | |||
| 
 | ||||
| .. versionchanged:: 3.1 | ||||
|    On some systems, conversion using the file system encoding may | ||||
|    fail. In this case, Python uses the ``utf8b`` encoding error | ||||
|    handler, which means that undecodable bytes are replaced by a | ||||
|    fail. In this case, Python uses the ``surrogateescape`` encoding | ||||
|    error handler, which means that undecodable bytes are replaced by a | ||||
|    Unicode character U+DCxx on decoding, and these are again | ||||
|    translated to the original byte on encoding. | ||||
| 
 | ||||
|  |  | |||
|  | @ -1521,32 +1521,32 @@ def test_unicode_escape(self): | |||
|         self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) | ||||
|         self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6)) | ||||
| 
 | ||||
| class Utf8bTest(unittest.TestCase): | ||||
| class SurrogateEscapeTest(unittest.TestCase): | ||||
| 
 | ||||
|     def test_utf8(self): | ||||
|         # Bad byte | ||||
|         self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"), | ||||
|         self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"), | ||||
|                          "foo\udc80bar") | ||||
|         self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"), | ||||
|         self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"), | ||||
|                          b"foo\x80bar") | ||||
|         # bad-utf-8 encoded surrogate | ||||
|         self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"), | ||||
|         self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"), | ||||
|                          "\udced\udcb0\udc80") | ||||
|         self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"), | ||||
|         self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"), | ||||
|                          b"\xed\xb0\x80") | ||||
| 
 | ||||
|     def test_ascii(self): | ||||
|         # bad byte | ||||
|         self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"), | ||||
|         self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"), | ||||
|                          "foo\udc80bar") | ||||
|         self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"), | ||||
|         self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"), | ||||
|                          b"foo\x80bar") | ||||
| 
 | ||||
|     def test_charmap(self): | ||||
|         # bad byte: \xa5 is unmapped in iso-8859-3 | ||||
|         self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"), | ||||
|         self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"), | ||||
|                          "foo\udca5bar") | ||||
|         self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"), | ||||
|         self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"), | ||||
|                          b"foo\xa5bar") | ||||
| 
 | ||||
| 
 | ||||
|  | @ -1576,7 +1576,7 @@ def test_main(): | |||
|         CharmapTest, | ||||
|         WithStmtTest, | ||||
|         TypesTest, | ||||
|         Utf8bTest, | ||||
|         SurrogateEscapeTest, | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -708,13 +708,13 @@ def setUp(self): | |||
|             self.fsencoding = sys.getfilesystemencoding() | ||||
|             sys.setfilesystemencoding("utf-8") | ||||
|             self.dir = support.TESTFN | ||||
|             self.bdir = self.dir.encode("utf-8", "utf8b") | ||||
|             self.bdir = self.dir.encode("utf-8", "surrogateescape") | ||||
|             os.mkdir(self.dir) | ||||
|             self.unicodefn = [] | ||||
|             for fn in self.filenames: | ||||
|                 f = open(os.path.join(self.bdir, fn), "w") | ||||
|                 f.close() | ||||
|                 self.unicodefn.append(fn.decode("utf-8", "utf8b")) | ||||
|                 self.unicodefn.append(fn.decode("utf-8", "surrogateescape")) | ||||
| 
 | ||||
|         def tearDown(self): | ||||
|             shutil.rmtree(self.dir) | ||||
|  |  | |||
|  | @ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) | |||
| 				return -1; | ||||
| 
 | ||||
| 			stringobj = PyUnicode_AsEncodedString( | ||||
| 				u, Py_FileSystemDefaultEncoding, "utf8b"); | ||||
| 				u, Py_FileSystemDefaultEncoding, "surrogateescape"); | ||||
| 			Py_DECREF(u); | ||||
| 			if (stringobj == NULL) | ||||
| 				return -1; | ||||
|  |  | |||
|  | @ -494,13 +494,13 @@ convertenviron(void) | |||
| 		if (p == NULL) | ||||
| 			continue; | ||||
| 		k = PyUnicode_Decode(*e, (int)(p-*e), | ||||
| 				     Py_FileSystemDefaultEncoding, "utf8b"); | ||||
| 				     Py_FileSystemDefaultEncoding, "surrogateescape"); | ||||
| 		if (k == NULL) { | ||||
| 			PyErr_Clear(); | ||||
| 			continue; | ||||
| 		} | ||||
| 		v = PyUnicode_Decode(p+1, strlen(p+1), | ||||
| 				     Py_FileSystemDefaultEncoding, "utf8b"); | ||||
| 				     Py_FileSystemDefaultEncoding, "surrogateescape"); | ||||
| 		if (v == NULL) { | ||||
| 			PyErr_Clear(); | ||||
| 			Py_DECREF(k); | ||||
|  | @ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes) | |||
| 		return posix_error(); | ||||
| 	if (use_bytes) | ||||
| 		return PyBytes_FromStringAndSize(buf, strlen(buf)); | ||||
| 	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b"); | ||||
| 	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape"); | ||||
| } | ||||
| 
 | ||||
| PyDoc_STRVAR(posix_getcwd__doc__, | ||||
|  | @ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args) | |||
| 
 | ||||
| 			w = PyUnicode_FromEncodedObject(v, | ||||
| 					Py_FileSystemDefaultEncoding, | ||||
| 					"utf8b"); | ||||
| 					"surrogateescape"); | ||||
| 			Py_DECREF(v); | ||||
| 			if (w != NULL) | ||||
| 				v = w; | ||||
|  | @ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args) | |||
| 
 | ||||
| 		w = PyUnicode_FromEncodedObject(v, | ||||
| 				Py_FileSystemDefaultEncoding, | ||||
| 				"utf8b"); | ||||
| 				"surrogateescape"); | ||||
| 		if (w != NULL) { | ||||
| 			Py_DECREF(v); | ||||
| 			v = w; | ||||
|  |  | |||
|  | @ -42,7 +42,7 @@ char2wchar(char* arg) | |||
| 			return res; | ||||
| 		PyMem_Free(res); | ||||
| 	} | ||||
| 	/* Conversion failed. Fall back to escaping with utf8b. */ | ||||
| 	/* Conversion failed. Fall back to escaping with surrogateescape. */ | ||||
| #ifdef HAVE_MBRTOWC | ||||
| 	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ | ||||
| 	 | ||||
|  |  | |||
|  | @ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) | |||
|             return 0; | ||||
|         output = PyUnicode_AsEncodedObject(arg,  | ||||
|                                            Py_FileSystemDefaultEncoding, | ||||
|                                            "utf8b"); | ||||
|                                            "surrogateescape"); | ||||
|         Py_DECREF(arg); | ||||
|         if (!output) | ||||
|             return 0; | ||||
|  |  | |||
|  | @ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) | |||
| } | ||||
| 
 | ||||
| static PyObject * | ||||
| PyCodec_UTF8bErrors(PyObject *exc) | ||||
| PyCodec_SurrogateEscapeErrors(PyObject *exc) | ||||
| { | ||||
|     PyObject *restuple; | ||||
|     PyObject *object; | ||||
|  | @ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc) | |||
|     return PyCodec_SurrogatePassErrors(exc); | ||||
| } | ||||
| 
 | ||||
| static PyObject *utf8b_errors(PyObject *self, PyObject *exc) | ||||
| static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) | ||||
| { | ||||
|     return PyCodec_UTF8bErrors(exc); | ||||
|     return PyCodec_SurrogateEscapeErrors(exc); | ||||
| } | ||||
| 
 | ||||
| static int _PyCodecRegistry_Init(void) | ||||
|  | @ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void) | |||
| 	    } | ||||
| 	}, | ||||
| 	{ | ||||
| 	    "utf8b", | ||||
| 	    "surrogateescape", | ||||
| 	    { | ||||
| 		"utf8b", | ||||
| 		utf8b_errors, | ||||
| 		"surrogateescape", | ||||
| 		surrogateescape_errors, | ||||
| 		METH_O | ||||
| 	    } | ||||
| 	} | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Martin v. Löwis
						Martin v. Löwis