Issue #19676: Added the "namereplace" error handler.

This commit is contained in:
Serhiy Storchaka 2014-11-25 13:57:17 +02:00
parent 6cecf68c7b
commit 166ebc4e5d
11 changed files with 255 additions and 7 deletions

View file

@ -158,6 +158,22 @@ def test_backslashescape(self):
sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
def test_nameescape(self):
# Does the same as backslashescape, but prefers ``\N{...}`` escape
# sequences.
sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
self.assertEqual(sin.encode("ascii", "namereplace"), sout)
sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}'
b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
self.assertEqual(sin.encode("latin-1", "namereplace"), sout)
sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4'
b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff')
self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout)
def test_decoding_callbacks(self):
# This is a test for a decoding callback handler
# that allows the decoding of the invalid sequence
@ -297,7 +313,7 @@ def handler2(exc):
def test_longstrings(self):
# test long strings to check for memory overflow problems
errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
"backslashreplace"]
"backslashreplace", "namereplace"]
# register the handlers under different names,
# to prevent the codec from recognizing the name
for err in errors:
@ -611,6 +627,81 @@ def test_badandgoodbackslashreplaceexceptions(self):
("\\udfff", 1)
)
def test_badandgoodnamereplaceexceptions(self):
# "namereplace" complains about a non-exception passed in
self.assertRaises(
TypeError,
codecs.namereplace_errors,
42
)
# "namereplace" complains about the wrong exception types
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeError("ouch")
)
# "namereplace" can only be used for encoding
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")
)
self.assertRaises(
TypeError,
codecs.namereplace_errors,
UnicodeTranslateError("\u3042", 0, 1, "ouch")
)
# Use the correct exception
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
("\\N{HIRAGANA LETTER A}", 1)
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
("\\x00", 1)
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
("\\N{LATIN SMALL LETTER Y WITH DIAERESIS}", 1)
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
("\\N{LATIN CAPITAL LETTER A WITH MACRON}", 1)
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
("\\uffff", 1)
)
if SIZEOF_WCHAR_T > 0:
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\U00010000",
0, 1, "ouch")),
("\\N{LINEAR B SYLLABLE B008 A}", 1)
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\U0010ffff",
0, 1, "ouch")),
("\\U0010ffff", 1)
)
# Lone surrogates (regardless of unicode width)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
("\\ud800", 1)
)
self.assertEqual(
codecs.namereplace_errors(
UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
("\\udfff", 1)
)
def test_badhandlerresults(self):
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
@ -651,6 +742,10 @@ def test_lookup(self):
codecs.backslashreplace_errors,
codecs.lookup_error("backslashreplace")
)
self.assertEqual(
codecs.namereplace_errors,
codecs.lookup_error("namereplace")
)
def test_unencodablereplacement(self):
def unencrepl(exc):
@ -804,7 +899,8 @@ def badencodereturn2(exc):
class D(dict):
def __getitem__(self, key):
raise ValueError
for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
for err in ("strict", "replace", "xmlcharrefreplace",
"backslashreplace", "namereplace", "test.posreturn"):
self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})