mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Rename utf8b error handler to surrogateescape.
This commit is contained in:
		
							parent
							
								
									e0a2b72e61
								
							
						
					
					
						commit
						43c57785d3
					
				
					 9 changed files with 30 additions and 30 deletions
				
			
		| 
						 | 
					@ -322,7 +322,7 @@ and implemented by all standard Python codecs:
 | 
				
			||||||
| ``'backslashreplace'``  | Replace with backslashed escape sequences     |
 | 
					| ``'backslashreplace'``  | Replace with backslashed escape sequences     |
 | 
				
			||||||
|                         | (only for encoding).                          |
 | 
					|                         | (only for encoding).                          |
 | 
				
			||||||
+-------------------------+-----------------------------------------------+
 | 
					+-------------------------+-----------------------------------------------+
 | 
				
			||||||
| ``'utf8b'``             | Replace byte with surrogate U+DCxx.           |
 | 
					| ``'surrogateescape'``   | Replace byte with surrogate U+DCxx.           |
 | 
				
			||||||
+-------------------------+-----------------------------------------------+
 | 
					+-------------------------+-----------------------------------------------+
 | 
				
			||||||
 | 
					
 | 
				
			||||||
In addition, the following error handlers are specific to a single codec:
 | 
					In addition, the following error handlers are specific to a single codec:
 | 
				
			||||||
| 
						 | 
					@ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
 | 
				
			||||||
+-------------------+---------+-------------------------------------------+
 | 
					+-------------------+---------+-------------------------------------------+
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. versionadded:: 3.1
 | 
					.. versionadded:: 3.1
 | 
				
			||||||
   The ``'utf8b'`` and ``'surrogatepass'`` error handlers.
 | 
					   The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The set of allowed values can be extended via :meth:`register_error`.
 | 
					The set of allowed values can be extended via :meth:`register_error`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. versionchanged:: 3.1
 | 
					.. versionchanged:: 3.1
 | 
				
			||||||
   On some systems, conversion using the file system encoding may
 | 
					   On some systems, conversion using the file system encoding may
 | 
				
			||||||
   fail. In this case, Python uses the ``utf8b`` encoding error
 | 
					   fail. In this case, Python uses the ``surrogateescape`` encoding
 | 
				
			||||||
   handler, which means that undecodable bytes are replaced by a
 | 
					   error handler, which means that undecodable bytes are replaced by a
 | 
				
			||||||
   Unicode character U+DCxx on decoding, and these are again
 | 
					   Unicode character U+DCxx on decoding, and these are again
 | 
				
			||||||
   translated to the original byte on encoding.
 | 
					   translated to the original byte on encoding.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1521,32 +1521,32 @@ def test_unicode_escape(self):
 | 
				
			||||||
        self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
 | 
					        self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
 | 
				
			||||||
        self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
 | 
					        self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Utf8bTest(unittest.TestCase):
 | 
					class SurrogateEscapeTest(unittest.TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_utf8(self):
 | 
					    def test_utf8(self):
 | 
				
			||||||
        # Bad byte
 | 
					        # Bad byte
 | 
				
			||||||
        self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
 | 
					        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
 | 
				
			||||||
                         "foo\udc80bar")
 | 
					                         "foo\udc80bar")
 | 
				
			||||||
        self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
 | 
					        self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),
 | 
				
			||||||
                         b"foo\x80bar")
 | 
					                         b"foo\x80bar")
 | 
				
			||||||
        # bad-utf-8 encoded surrogate
 | 
					        # bad-utf-8 encoded surrogate
 | 
				
			||||||
        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
 | 
					        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
 | 
				
			||||||
                         "\udced\udcb0\udc80")
 | 
					                         "\udced\udcb0\udc80")
 | 
				
			||||||
        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
 | 
					        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
 | 
				
			||||||
                         b"\xed\xb0\x80")
 | 
					                         b"\xed\xb0\x80")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_ascii(self):
 | 
					    def test_ascii(self):
 | 
				
			||||||
        # bad byte
 | 
					        # bad byte
 | 
				
			||||||
        self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
 | 
					        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
 | 
				
			||||||
                         "foo\udc80bar")
 | 
					                         "foo\udc80bar")
 | 
				
			||||||
        self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
 | 
					        self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
 | 
				
			||||||
                         b"foo\x80bar")
 | 
					                         b"foo\x80bar")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_charmap(self):
 | 
					    def test_charmap(self):
 | 
				
			||||||
        # bad byte: \xa5 is unmapped in iso-8859-3
 | 
					        # bad byte: \xa5 is unmapped in iso-8859-3
 | 
				
			||||||
        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
 | 
					        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
 | 
				
			||||||
                         "foo\udca5bar")
 | 
					                         "foo\udca5bar")
 | 
				
			||||||
        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
 | 
					        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
 | 
				
			||||||
                         b"foo\xa5bar")
 | 
					                         b"foo\xa5bar")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1576,7 +1576,7 @@ def test_main():
 | 
				
			||||||
        CharmapTest,
 | 
					        CharmapTest,
 | 
				
			||||||
        WithStmtTest,
 | 
					        WithStmtTest,
 | 
				
			||||||
        TypesTest,
 | 
					        TypesTest,
 | 
				
			||||||
        Utf8bTest,
 | 
					        SurrogateEscapeTest,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -708,13 +708,13 @@ def setUp(self):
 | 
				
			||||||
            self.fsencoding = sys.getfilesystemencoding()
 | 
					            self.fsencoding = sys.getfilesystemencoding()
 | 
				
			||||||
            sys.setfilesystemencoding("utf-8")
 | 
					            sys.setfilesystemencoding("utf-8")
 | 
				
			||||||
            self.dir = support.TESTFN
 | 
					            self.dir = support.TESTFN
 | 
				
			||||||
            self.bdir = self.dir.encode("utf-8", "utf8b")
 | 
					            self.bdir = self.dir.encode("utf-8", "surrogateescape")
 | 
				
			||||||
            os.mkdir(self.dir)
 | 
					            os.mkdir(self.dir)
 | 
				
			||||||
            self.unicodefn = []
 | 
					            self.unicodefn = []
 | 
				
			||||||
            for fn in self.filenames:
 | 
					            for fn in self.filenames:
 | 
				
			||||||
                f = open(os.path.join(self.bdir, fn), "w")
 | 
					                f = open(os.path.join(self.bdir, fn), "w")
 | 
				
			||||||
                f.close()
 | 
					                f.close()
 | 
				
			||||||
                self.unicodefn.append(fn.decode("utf-8", "utf8b"))
 | 
					                self.unicodefn.append(fn.decode("utf-8", "surrogateescape"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def tearDown(self):
 | 
					        def tearDown(self):
 | 
				
			||||||
            shutil.rmtree(self.dir)
 | 
					            shutil.rmtree(self.dir)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
 | 
				
			||||||
				return -1;
 | 
									return -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			stringobj = PyUnicode_AsEncodedString(
 | 
								stringobj = PyUnicode_AsEncodedString(
 | 
				
			||||||
				u, Py_FileSystemDefaultEncoding, "utf8b");
 | 
									u, Py_FileSystemDefaultEncoding, "surrogateescape");
 | 
				
			||||||
			Py_DECREF(u);
 | 
								Py_DECREF(u);
 | 
				
			||||||
			if (stringobj == NULL)
 | 
								if (stringobj == NULL)
 | 
				
			||||||
				return -1;
 | 
									return -1;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -494,13 +494,13 @@ convertenviron(void)
 | 
				
			||||||
		if (p == NULL)
 | 
							if (p == NULL)
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		k = PyUnicode_Decode(*e, (int)(p-*e),
 | 
							k = PyUnicode_Decode(*e, (int)(p-*e),
 | 
				
			||||||
				     Py_FileSystemDefaultEncoding, "utf8b");
 | 
									     Py_FileSystemDefaultEncoding, "surrogateescape");
 | 
				
			||||||
		if (k == NULL) {
 | 
							if (k == NULL) {
 | 
				
			||||||
			PyErr_Clear();
 | 
								PyErr_Clear();
 | 
				
			||||||
			continue;
 | 
								continue;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		v = PyUnicode_Decode(p+1, strlen(p+1),
 | 
							v = PyUnicode_Decode(p+1, strlen(p+1),
 | 
				
			||||||
				     Py_FileSystemDefaultEncoding, "utf8b");
 | 
									     Py_FileSystemDefaultEncoding, "surrogateescape");
 | 
				
			||||||
		if (v == NULL) {
 | 
							if (v == NULL) {
 | 
				
			||||||
			PyErr_Clear();
 | 
								PyErr_Clear();
 | 
				
			||||||
			Py_DECREF(k);
 | 
								Py_DECREF(k);
 | 
				
			||||||
| 
						 | 
					@ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes)
 | 
				
			||||||
		return posix_error();
 | 
							return posix_error();
 | 
				
			||||||
	if (use_bytes)
 | 
						if (use_bytes)
 | 
				
			||||||
		return PyBytes_FromStringAndSize(buf, strlen(buf));
 | 
							return PyBytes_FromStringAndSize(buf, strlen(buf));
 | 
				
			||||||
	return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
 | 
						return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PyDoc_STRVAR(posix_getcwd__doc__,
 | 
					PyDoc_STRVAR(posix_getcwd__doc__,
 | 
				
			||||||
| 
						 | 
					@ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			w = PyUnicode_FromEncodedObject(v,
 | 
								w = PyUnicode_FromEncodedObject(v,
 | 
				
			||||||
					Py_FileSystemDefaultEncoding,
 | 
										Py_FileSystemDefaultEncoding,
 | 
				
			||||||
					"utf8b");
 | 
										"surrogateescape");
 | 
				
			||||||
			Py_DECREF(v);
 | 
								Py_DECREF(v);
 | 
				
			||||||
			if (w != NULL)
 | 
								if (w != NULL)
 | 
				
			||||||
				v = w;
 | 
									v = w;
 | 
				
			||||||
| 
						 | 
					@ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		w = PyUnicode_FromEncodedObject(v,
 | 
							w = PyUnicode_FromEncodedObject(v,
 | 
				
			||||||
				Py_FileSystemDefaultEncoding,
 | 
									Py_FileSystemDefaultEncoding,
 | 
				
			||||||
				"utf8b");
 | 
									"surrogateescape");
 | 
				
			||||||
		if (w != NULL) {
 | 
							if (w != NULL) {
 | 
				
			||||||
			Py_DECREF(v);
 | 
								Py_DECREF(v);
 | 
				
			||||||
			v = w;
 | 
								v = w;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,7 +42,7 @@ char2wchar(char* arg)
 | 
				
			||||||
			return res;
 | 
								return res;
 | 
				
			||||||
		PyMem_Free(res);
 | 
							PyMem_Free(res);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	/* Conversion failed. Fall back to escaping with utf8b. */
 | 
						/* Conversion failed. Fall back to escaping with surrogateescape. */
 | 
				
			||||||
#ifdef HAVE_MBRTOWC
 | 
					#ifdef HAVE_MBRTOWC
 | 
				
			||||||
	/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
 | 
						/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
 | 
				
			||||||
            return 0;
 | 
					            return 0;
 | 
				
			||||||
        output = PyUnicode_AsEncodedObject(arg, 
 | 
					        output = PyUnicode_AsEncodedObject(arg, 
 | 
				
			||||||
                                           Py_FileSystemDefaultEncoding,
 | 
					                                           Py_FileSystemDefaultEncoding,
 | 
				
			||||||
                                           "utf8b");
 | 
					                                           "surrogateescape");
 | 
				
			||||||
        Py_DECREF(arg);
 | 
					        Py_DECREF(arg);
 | 
				
			||||||
        if (!output)
 | 
					        if (!output)
 | 
				
			||||||
            return 0;
 | 
					            return 0;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *
 | 
					static PyObject *
 | 
				
			||||||
PyCodec_UTF8bErrors(PyObject *exc)
 | 
					PyCodec_SurrogateEscapeErrors(PyObject *exc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    PyObject *restuple;
 | 
					    PyObject *restuple;
 | 
				
			||||||
    PyObject *object;
 | 
					    PyObject *object;
 | 
				
			||||||
| 
						 | 
					@ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
 | 
				
			||||||
    return PyCodec_SurrogatePassErrors(exc);
 | 
					    return PyCodec_SurrogatePassErrors(exc);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
 | 
					static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    return PyCodec_UTF8bErrors(exc);
 | 
					    return PyCodec_SurrogateEscapeErrors(exc);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int _PyCodecRegistry_Init(void)
 | 
					static int _PyCodecRegistry_Init(void)
 | 
				
			||||||
| 
						 | 
					@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)
 | 
				
			||||||
	    }
 | 
						    }
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
	    "utf8b",
 | 
						    "surrogateescape",
 | 
				
			||||||
	    {
 | 
						    {
 | 
				
			||||||
		"utf8b",
 | 
							"surrogateescape",
 | 
				
			||||||
		utf8b_errors,
 | 
							surrogateescape_errors,
 | 
				
			||||||
		METH_O
 | 
							METH_O
 | 
				
			||||||
	    }
 | 
						    }
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue