| 
									
										
										
										
											2011-11-15 22:44:05 +01:00
										 |  |  | import codecs | 
					
						
							|  |  |  | import html.entities | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import unicodedata | 
					
						
							|  |  |  | import unittest | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-07-01 18:28:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | class PosReturn: | 
					
						
							|  |  |  |     # this can be used for configurable callbacks | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.pos = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def handle(self, exc): | 
					
						
							|  |  |  |         oldpos = self.pos | 
					
						
							|  |  |  |         realpos = oldpos | 
					
						
							|  |  |  |         if realpos<0: | 
					
						
							| 
									
										
										
										
											2003-02-19 02:35:07 +00:00
										 |  |  |             realpos = len(exc.object) + realpos | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         # if we don't advance this time, terminate on the next call | 
					
						
							|  |  |  |         # otherwise we'd get an endless loop | 
					
						
							|  |  |  |         if realpos <= exc.start: | 
					
						
							|  |  |  |             self.pos = len(exc.object) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         return ("<?>", oldpos) | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  | # A UnicodeEncodeError object with a bad start attribute | 
					
						
							|  |  |  | class BadStartUnicodeEncodeError(UnicodeEncodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.start = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeEncodeError object with a bad object attribute | 
					
						
							|  |  |  | class BadObjectUnicodeEncodeError(UnicodeEncodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.object = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeDecodeError object without an end attribute | 
					
						
							|  |  |  | class NoEndUnicodeDecodeError(UnicodeDecodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |         UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeDecodeError object with a bad object attribute | 
					
						
							|  |  |  | class BadObjectUnicodeDecodeError(UnicodeDecodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |         UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.object = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without a start attribute | 
					
						
							|  |  |  | class NoStartUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.start | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without an end attribute | 
					
						
							|  |  |  | class NoEndUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self,  "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without an object attribute | 
					
						
							|  |  |  | class NoObjectUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.object | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | class CodecCallbackTest(unittest.TestCase): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharrefreplace(self): | 
					
						
							|  |  |  |         # replace unencodable characters which numeric character entities. | 
					
						
							|  |  |  |         # For ascii, latin-1 and charmaps this is completely implemented | 
					
						
							|  |  |  |         # in C and should be reasonably fast. | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         s = "\u30b9\u30d1\u30e2 \xe4nd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.encode("ascii", "xmlcharrefreplace"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"スパモ änd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.encode("latin-1", "xmlcharrefreplace"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"スパモ \xe4nd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharnamereplace(self): | 
					
						
							|  |  |  |         # This time use a named character entity for unencodable | 
					
						
							|  |  |  |         # characters, if one is available. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def xmlcharnamereplace(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |             l = [] | 
					
						
							|  |  |  |             for c in exc.object[exc.start:exc.end]: | 
					
						
							|  |  |  |                 try: | 
					
						
							| 
									
										
										
										
											2008-05-17 22:02:32 +00:00
										 |  |  |                     l.append("&%s;" % html.entities.codepoint2name[ord(c)]) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 except KeyError: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     l.append("&#%d;" % ord(c)) | 
					
						
							|  |  |  |             return ("".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.xmlcharnamereplace", xmlcharnamereplace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"«ℜ» = ⟨ሴ€⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xabℜ\xbb = ⟨ሴ€⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_uninamereplace(self): | 
					
						
							|  |  |  |         # We're using the names from the unicode database this time, | 
					
						
							| 
									
										
										
										
											2002-11-25 17:58:02 +00:00
										 |  |  |         # and we're doing "syntax highlighting" here, i.e. we include | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         # the replaced text in ANSI escape sequences. For this it is | 
					
						
							|  |  |  |         # useful that the error handler is not called for every single | 
					
						
							|  |  |  |         # unencodable character, but for a complete sequence of | 
					
						
							|  |  |  |         # unencodable characters, otherwise we would output many | 
					
						
							| 
									
										
										
										
											2009-02-21 20:59:32 +00:00
										 |  |  |         # unnecessary escape sequences. | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def uninamereplace(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |             l = [] | 
					
						
							|  |  |  |             for c in exc.object[exc.start:exc.end]: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 l.append(unicodedata.name(c, "0x%x" % ord(c))) | 
					
						
							|  |  |  |             return ("\033[1m%s\033[0m" % ", ".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.uninamereplace", uninamereplace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "\xac\u1234\u20ac\u8000" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_backslashescape(self): | 
					
						
							|  |  |  |         # Does the same as the "unicode-escape" encoding, but with different | 
					
						
							|  |  |  |         # base encodings. | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |         sin = "a\xac\u1234\u20ac\u8000\U0010ffff" | 
					
						
							|  |  |  |         sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |         sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |         sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |     def test_nameescape(self): | 
					
						
							|  |  |  |         # Does the same as backslashescape, but prefers ``\N{...}`` escape | 
					
						
							|  |  |  |         # sequences. | 
					
						
							|  |  |  |         sin = "a\xac\u1234\u20ac\u8000\U0010ffff" | 
					
						
							|  |  |  |         sout = (b'a\\N{NOT SIGN}\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' | 
					
						
							|  |  |  |                 b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') | 
					
						
							|  |  |  |         self.assertEqual(sin.encode("ascii", "namereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\\N{EURO SIGN}' | 
					
						
							|  |  |  |                 b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') | 
					
						
							|  |  |  |         self.assertEqual(sin.encode("latin-1", "namereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         sout = (b'a\xac\\N{ETHIOPIC SYLLABLE SEE}\xa4' | 
					
						
							|  |  |  |                 b'\\N{CJK UNIFIED IDEOGRAPH-8000}\\U0010ffff') | 
					
						
							|  |  |  |         self.assertEqual(sin.encode("iso-8859-15", "namereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |     def test_decoding_callbacks(self): | 
					
						
							|  |  |  |         # This is a test for a decoding callback handler | 
					
						
							|  |  |  |         # that allows the decoding of the invalid sequence | 
					
						
							|  |  |  |         # "\xc0\x80" and returns "\x00" instead of raising an error. | 
					
						
							|  |  |  |         # All other illegal sequences will be handled strictly. | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         def relaxedutf8(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |             if exc.object[exc.start:exc.start+2] == b"\xc0\x80": | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\x00", exc.start+2) # retry after two bytes | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise exc | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |         codecs.register_error("test.relaxedutf8", relaxedutf8) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |         # all the "\xc0\x80" will be decoded to "\x00" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sout = "a\x00b\x00c\xfc\x00\x00" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sin = b"\xc0\x80\xc0\x81" | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |         self.assertRaises(UnicodeDecodeError, sin.decode, | 
					
						
							|  |  |  |                           "utf-8", "test.relaxedutf8") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_charmapencode(self): | 
					
						
							|  |  |  |         # For charmap encodings the replacement string will be | 
					
						
							|  |  |  |         # mapped through the encoding again. This means, that | 
					
						
							|  |  |  |         # to be able to use e.g. the "replace" handler, the | 
					
						
							|  |  |  |         # charmap has to have a mapping for "?". | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh") | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abc" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"AABBCC" | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abcA" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         charmap[ord("?")] = b"XYZ" | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abcDEF" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"AABBCCXYZXYZXYZ" | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         charmap[ord("?")] = "XYZ" # wrong type in mapping | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_callbacks(self): | 
					
						
							|  |  |  |         def handler1(exc): | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             r = range(exc.start, exc.end) | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 l = ["<%d>" % ord(exc.object[pos]) for pos in r] | 
					
						
							|  |  |  |             elif isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 l = ["<%d>" % exc.object[pos] for pos in r] | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("[%s]" % "".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error("test.handler1", handler1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def handler2(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)] | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("[%s]" % "".join(l), exc.end+1) # skip one character | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error("test.handler2", handler2) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         s = b"\x00\x81\x7f\x80\xff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.decode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\x00[<129>]\x7f[<128>][<255>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.decode("ascii", "test.handler2"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\x00[<129>][<128>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2016-09-08 13:59:53 -04:00
										 |  |  |             b"\\u3042\\u3xxx".decode("unicode-escape", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2013-01-29 10:20:44 +02:00
										 |  |  |             "\u3042[<92><117><51>]xxx" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2016-09-08 13:59:53 -04:00
										 |  |  |             b"\\u3042\\u3xx".decode("unicode-escape", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2013-01-29 10:20:44 +02:00
										 |  |  |             "\u3042[<92><117><51>]xx" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "z[<98>][<99>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "g\xfc\xdfrk".encode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"g[<252><223>]rk" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "g\xfc\xdf".encode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"g[<252><223>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_longstrings(self): | 
					
						
							|  |  |  |         # test long strings to check for memory overflow problems | 
					
						
							| 
									
										
										
										
											2007-08-16 21:55:45 +00:00
										 |  |  |         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |                    "backslashreplace", "namereplace"] | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         # register the handlers under different names, | 
					
						
							|  |  |  |         # to prevent the codec from recognizing the name | 
					
						
							|  |  |  |         for err in errors: | 
					
						
							|  |  |  |             codecs.register_error("test." + err, codecs.lookup_error(err)) | 
					
						
							|  |  |  |         l = 1000 | 
					
						
							|  |  |  |         errors += [ "test." + err for err in errors ] | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]: | 
					
						
							| 
									
										
										
										
											2007-08-16 21:55:45 +00:00
										 |  |  |             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", | 
					
						
							|  |  |  |                         "utf-8", "utf-7", "utf-16", "utf-32"): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 for err in errors: | 
					
						
							| 
									
										
										
										
											2002-11-09 05:26:15 +00:00
										 |  |  |                     try: | 
					
						
							|  |  |  |                         uni.encode(enc, err) | 
					
						
							|  |  |  |                     except UnicodeError: | 
					
						
							|  |  |  |                         pass | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def check_exceptionobjectargs(self, exctype, args, msg): | 
					
						
							|  |  |  |         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion | 
					
						
							|  |  |  |         # check with one missing argument | 
					
						
							|  |  |  |         self.assertRaises(TypeError, exctype, *args[:-1]) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # check with one argument too much | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(TypeError, exctype, *(args + ["too much"])) | 
					
						
							|  |  |  |         # check with one argument of the wrong type | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ] | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |         for i in range(len(args)): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             for wrongarg in wrongargs: | 
					
						
							|  |  |  |                 if type(wrongarg) is type(args[i]): | 
					
						
							| 
									
										
										
										
											2002-11-09 05:26:15 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 # build argument array | 
					
						
							|  |  |  |                 callargs = [] | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |                 for j in range(len(args)): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                     if i==j: | 
					
						
							|  |  |  |                         callargs.append(wrongarg) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         callargs.append(args[i]) | 
					
						
							|  |  |  |                 self.assertRaises(TypeError, exctype, *callargs) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # check with the correct number and type of arguments | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         exc = exctype(*args) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(str(exc), msg) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodeencodeerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\xfc' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "g\xfcrk", 1, 4, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't encode characters in position 1-3: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\xfcx", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\xfc' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\u0100x", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\u0100' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\uffffx", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\uffff' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							|  |  |  |             ["ascii", "\U00010000x", 0, 1, "ouch"], | 
					
						
							|  |  |  |             "'ascii' codec can't encode character '\\U00010000' in position 0: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodedecodeerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             ["ascii", bytearray(b"g\xfcrk"), 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't decode byte 0xfc in position 1: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             ["ascii", bytearray(b"g\xfcrk"), 1, 3, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't decode bytes in position 1-2: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodetranslateerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "can't translate character '\\xfc' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\u0100rk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "can't translate character '\\u0100' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\uffffrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "can't translate character '\\uffff' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							|  |  |  |             ["g\U00010000rk", 1, 2, "ouch"], | 
					
						
							|  |  |  |             "can't translate character '\\U00010000' in position 1: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\xfcrk", 1, 3, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "can't translate characters in position 1-2: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodstrictexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "strict" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "strict" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             Exception, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             Exception("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # If the correct exception is passed in, "strict" raises it | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodignoreexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "ignore" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.ignore_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "ignore" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.ignore_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # If the correct exception is passed in, "ignore" returns an empty replacement | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("", 2) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("", 2) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeTranslateError("a\u3042b", 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("", 2) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "replace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.replace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "replace" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.replace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             BadObjectUnicodeEncodeError() | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             BadObjectUnicodeDecodeError() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |         # With the correct exception, "replace" returns an "?" or "\ufffd" replacement | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeEncodeError("ascii", "a\u3042b", 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("?", 2) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeDecodeError("ascii", bytearray(b"a\xffb"), 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("\ufffd", 2) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeTranslateError("a\u3042b", 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("\ufffd", 2) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodxmlcharrefreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" complains about the wrong exception types | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" can only be used for encoding | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 99999, 100000, | 
					
						
							|  |  |  |               999999, 1000000) | 
					
						
							|  |  |  |         cs += (0xd800, 0xdfff) | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |         s = "".join(chr(c) for c in cs) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |             codecs.xmlcharrefreplace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeEncodeError("ascii", "a" + s + "b", | 
					
						
							|  |  |  |                                    1, 1 + len(s), "ouch") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |             ("".join("&#%d;" % c for c in cs), 1 + len(s)) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodbackslashreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.backslashreplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" complains about the wrong exception types | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.backslashreplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         tests = [ | 
					
						
							|  |  |  |             ("\u3042", "\\u3042"), | 
					
						
							|  |  |  |             ("\n", "\\x0a"), | 
					
						
							|  |  |  |             ("a", "\\x61"), | 
					
						
							|  |  |  |             ("\x00", "\\x00"), | 
					
						
							|  |  |  |             ("\xff", "\\xff"), | 
					
						
							|  |  |  |             ("\u0100", "\\u0100"), | 
					
						
							|  |  |  |             ("\uffff", "\\uffff"), | 
					
						
							|  |  |  |             ("\U00010000", "\\U00010000"), | 
					
						
							|  |  |  |             ("\U0010ffff", "\\U0010ffff"), | 
					
						
							|  |  |  |             # Lone surrogates | 
					
						
							|  |  |  |             ("\ud800", "\\ud800"), | 
					
						
							|  |  |  |             ("\udfff", "\\udfff"), | 
					
						
							|  |  |  |             ("\ud800\udfff", "\\ud800\\udfff"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         for s, r in tests: | 
					
						
							|  |  |  |             with self.subTest(str=s): | 
					
						
							|  |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                         UnicodeEncodeError("ascii", "a" + s + "b", | 
					
						
							|  |  |  |                                            1, 1 + len(s), "ouch")), | 
					
						
							|  |  |  |                     (r, 1 + len(s)) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:43:34 +02:00
										 |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:31:38 +02:00
										 |  |  |                         UnicodeTranslateError("a" + s + "b", | 
					
						
							|  |  |  |                                               1, 1 + len(s), "ouch")), | 
					
						
							|  |  |  |                     (r, 1 + len(s)) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:43:34 +02:00
										 |  |  |                 ) | 
					
						
							|  |  |  |         tests = [ | 
					
						
							|  |  |  |             (b"a", "\\x61"), | 
					
						
							|  |  |  |             (b"\n", "\\x0a"), | 
					
						
							|  |  |  |             (b"\x00", "\\x00"), | 
					
						
							|  |  |  |             (b"\xff", "\\xff"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         for b, r in tests: | 
					
						
							|  |  |  |             with self.subTest(bytes=b): | 
					
						
							|  |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:31:38 +02:00
										 |  |  |                         UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"), | 
					
						
							|  |  |  |                                            1, 2, "ouch")), | 
					
						
							|  |  |  |                     (r, 2) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:43:34 +02:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |     def test_badandgoodnamereplaceexceptions(self): | 
					
						
							|  |  |  |         # "namereplace" complains about a non-exception passed in | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.namereplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         # "namereplace" complains about the wrong exception types | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.namereplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         # "namereplace" can only be used for encoding | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.namereplace_errors, | 
					
						
							|  |  |  |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.namereplace_errors, | 
					
						
							|  |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2015-03-15 23:43:34 +02:00
										 |  |  |         tests = [ | 
					
						
							|  |  |  |             ("\u3042", "\\N{HIRAGANA LETTER A}"), | 
					
						
							|  |  |  |             ("\x00", "\\x00"), | 
					
						
							|  |  |  |             ("\ufbf9", "\\N{ARABIC LIGATURE UIGHUR KIRGHIZ YEH WITH " | 
					
						
							|  |  |  |                        "HAMZA ABOVE WITH ALEF MAKSURA ISOLATED FORM}"), | 
					
						
							|  |  |  |             ("\U000e007f", "\\N{CANCEL TAG}"), | 
					
						
							|  |  |  |             ("\U0010ffff", "\\U0010ffff"), | 
					
						
							|  |  |  |             # Lone surrogates | 
					
						
							|  |  |  |             ("\ud800", "\\ud800"), | 
					
						
							|  |  |  |             ("\udfff", "\\udfff"), | 
					
						
							|  |  |  |             ("\ud800\udfff", "\\ud800\\udfff"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         for s, r in tests: | 
					
						
							|  |  |  |             with self.subTest(str=s): | 
					
						
							|  |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     codecs.namereplace_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:31:38 +02:00
										 |  |  |                         UnicodeEncodeError("ascii", "a" + s + "b", | 
					
						
							|  |  |  |                                            1, 1 + len(s), "ouch")), | 
					
						
							|  |  |  |                     (r, 1 + len(s)) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:43:34 +02:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodsurrogateescapeexceptions(self): | 
					
						
							|  |  |  |         surrogateescape_errors = codecs.lookup_error('surrogateescape') | 
					
						
							|  |  |  |         # "surrogateescape" complains about a non-exception passed in | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            surrogateescape_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         # "surrogateescape" complains about the wrong exception types | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            surrogateescape_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         # "surrogateescape" can not be used for translating | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             surrogateescape_errors, | 
					
						
							|  |  |  |             UnicodeTranslateError("\udc80", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         # Use the correct exception | 
					
						
							|  |  |  |         for s in ("a", "\udc7f", "\udd00"): | 
					
						
							|  |  |  |             with self.subTest(str=s): | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     UnicodeEncodeError, | 
					
						
							|  |  |  |                     surrogateescape_errors, | 
					
						
							|  |  |  |                     UnicodeEncodeError("ascii", s, 0, 1, "ouch") | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |             surrogateescape_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeEncodeError("ascii", "a\udc80b", 1, 2, "ouch")), | 
					
						
							|  |  |  |             (b"\x80", 2) | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							|  |  |  |             surrogateescape_errors, | 
					
						
							|  |  |  |             UnicodeDecodeError("ascii", bytearray(b"a"), 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2015-01-25 22:56:57 +02:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |             surrogateescape_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                 UnicodeDecodeError("ascii", bytearray(b"a\x80b"), 1, 2, "ouch")), | 
					
						
							|  |  |  |             ("\udc80", 2) | 
					
						
							| 
									
										
										
										
											2015-01-25 22:56:57 +02:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |     def test_badandgoodsurrogatepassexceptions(self): | 
					
						
							|  |  |  |         surrogatepass_errors = codecs.lookup_error('surrogatepass') | 
					
						
							|  |  |  |         # "surrogatepass" complains about a non-exception passed in | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            surrogatepass_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         # "surrogatepass" complains about the wrong exception types | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            surrogatepass_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         # "surrogatepass" can not be used for translating | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             surrogatepass_errors, | 
					
						
							|  |  |  |             UnicodeTranslateError("\ud800", 0, 1, "ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         # Use the correct exception | 
					
						
							|  |  |  |         for enc in ("utf-8", "utf-16le", "utf-16be", "utf-32le", "utf-32be"): | 
					
						
							|  |  |  |             with self.subTest(encoding=enc): | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     UnicodeEncodeError, | 
					
						
							|  |  |  |                     surrogatepass_errors, | 
					
						
							|  |  |  |                     UnicodeEncodeError(enc, "a", 0, 1, "ouch") | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     UnicodeDecodeError, | 
					
						
							|  |  |  |                     surrogatepass_errors, | 
					
						
							|  |  |  |                     UnicodeDecodeError(enc, "a".encode(enc), 0, 1, "ouch") | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:43:34 +02:00
										 |  |  |         for s in ("\ud800", "\udfff", "\ud800\udfff"): | 
					
						
							|  |  |  |             with self.subTest(str=s): | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     UnicodeEncodeError, | 
					
						
							|  |  |  |                     surrogatepass_errors, | 
					
						
							|  |  |  |                     UnicodeEncodeError("ascii", s, 0, len(s), "ouch") | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         tests = [ | 
					
						
							|  |  |  |             ("utf-8", "\ud800", b'\xed\xa0\x80', 3), | 
					
						
							|  |  |  |             ("utf-16le", "\ud800", b'\x00\xd8', 2), | 
					
						
							|  |  |  |             ("utf-16be", "\ud800", b'\xd8\x00', 2), | 
					
						
							|  |  |  |             ("utf-32le", "\ud800", b'\x00\xd8\x00\x00', 4), | 
					
						
							|  |  |  |             ("utf-32be", "\ud800", b'\x00\x00\xd8\x00', 4), | 
					
						
							|  |  |  |             ("utf-8", "\udfff", b'\xed\xbf\xbf', 3), | 
					
						
							|  |  |  |             ("utf-16le", "\udfff", b'\xff\xdf', 2), | 
					
						
							|  |  |  |             ("utf-16be", "\udfff", b'\xdf\xff', 2), | 
					
						
							|  |  |  |             ("utf-32le", "\udfff", b'\xff\xdf\x00\x00', 4), | 
					
						
							|  |  |  |             ("utf-32be", "\udfff", b'\x00\x00\xdf\xff', 4), | 
					
						
							|  |  |  |             ("utf-8", "\ud800\udfff", b'\xed\xa0\x80\xed\xbf\xbf', 3), | 
					
						
							|  |  |  |             ("utf-16le", "\ud800\udfff", b'\x00\xd8\xff\xdf', 2), | 
					
						
							|  |  |  |             ("utf-16be", "\ud800\udfff", b'\xd8\x00\xdf\xff', 2), | 
					
						
							|  |  |  |             ("utf-32le", "\ud800\udfff", b'\x00\xd8\x00\x00\xff\xdf\x00\x00', 4), | 
					
						
							|  |  |  |             ("utf-32be", "\ud800\udfff", b'\x00\x00\xd8\x00\x00\x00\xdf\xff', 4), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         for enc, s, b, n in tests: | 
					
						
							|  |  |  |             with self.subTest(encoding=enc, str=s, bytes=b): | 
					
						
							|  |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     surrogatepass_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                         UnicodeEncodeError(enc, "a" + s + "b", | 
					
						
							|  |  |  |                                            1, 1 + len(s), "ouch")), | 
					
						
							|  |  |  |                     (b, 1 + len(s)) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |                 ) | 
					
						
							|  |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     surrogatepass_errors( | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                         UnicodeDecodeError(enc, bytearray(b"a" + b[:n] + b"b"), | 
					
						
							| 
									
										
										
										
											2015-03-16 08:31:38 +02:00
										 |  |  |                                            1, 1 + n, "ouch")), | 
					
						
							| 
									
										
										
										
											2015-03-16 08:29:47 +02:00
										 |  |  |                     (s[:1], 1 + n) | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badhandlerresults(self): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for res in results: | 
					
						
							| 
									
										
											  
											
												Merged revisions 68633,68648,68667,68706,68718,68720-68721,68724-68727,68739 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
  r68633 | thomas.heller | 2009-01-16 12:53:44 -0600 (Fri, 16 Jan 2009) | 3 lines
  Change an example in the docs to avoid a mistake when the code is copy
  pasted and changed afterwards.
........
  r68648 | benjamin.peterson | 2009-01-16 22:28:57 -0600 (Fri, 16 Jan 2009) | 1 line
  use enumerate
........
  r68667 | amaury.forgeotdarc | 2009-01-17 14:18:59 -0600 (Sat, 17 Jan 2009) | 3 lines
  #4077: No need to append \n when calling Py_FatalError
  + fix a declaration to make it match the one in pythonrun.h
........
  r68706 | benjamin.peterson | 2009-01-17 19:28:46 -0600 (Sat, 17 Jan 2009) | 1 line
  fix grammar
........
  r68718 | georg.brandl | 2009-01-18 04:42:35 -0600 (Sun, 18 Jan 2009) | 1 line
  #4976: union() and intersection() take multiple args, but talk about "the other".
........
  r68720 | georg.brandl | 2009-01-18 04:45:22 -0600 (Sun, 18 Jan 2009) | 1 line
  #4974: fix redundant mention of lists and tuples.
........
  r68721 | georg.brandl | 2009-01-18 04:48:16 -0600 (Sun, 18 Jan 2009) | 1 line
  #4914: trunc is in math.
........
  r68724 | georg.brandl | 2009-01-18 07:24:10 -0600 (Sun, 18 Jan 2009) | 1 line
  #4979: correct result range for some random functions.
........
  r68725 | georg.brandl | 2009-01-18 07:47:26 -0600 (Sun, 18 Jan 2009) | 1 line
  #4857: fix augmented assignment target spec.
........
  r68726 | georg.brandl | 2009-01-18 08:41:52 -0600 (Sun, 18 Jan 2009) | 1 line
  #4923: clarify what was added.
........
  r68727 | georg.brandl | 2009-01-18 12:25:30 -0600 (Sun, 18 Jan 2009) | 1 line
  #4986: augassigns are not expressions.
........
  r68739 | benjamin.peterson | 2009-01-18 15:11:38 -0600 (Sun, 18 Jan 2009) | 1 line
  fix test that wasn't working as expected #4990
........
											
										 
											2009-01-18 22:27:04 +00:00
										 |  |  |             codecs.register_error("test.badhandler", lambda x: res) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             for enc in encs: | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     TypeError, | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     "\u3042".encode, | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                     enc, | 
					
						
							|  |  |  |                     "test.badhandler" | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             for (enc, bytes) in ( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 ("ascii", b"\xff"), | 
					
						
							|  |  |  |                 ("utf-8", b"\xff"), | 
					
						
							|  |  |  |                 ("utf-7", b"+x-"), | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             ): | 
					
						
							| 
									
										
										
										
											2019-03-18 15:44:11 +09:00
										 |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     TypeError, | 
					
						
							|  |  |  |                     bytes.decode, | 
					
						
							|  |  |  |                     enc, | 
					
						
							|  |  |  |                     "test.badhandler" | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_lookup(self): | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) | 
					
						
							|  |  |  |         self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) | 
					
						
							|  |  |  |         self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("xmlcharrefreplace") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             codecs.backslashreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("backslashreplace") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             codecs.namereplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("namereplace") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |     def test_unencodablereplacement(self): | 
					
						
							|  |  |  |         def unencrepl(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeEncodeError): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\u4242", exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.unencreplhandler", unencrepl) | 
					
						
							|  |  |  |         for enc in ("ascii", "iso-8859-1", "iso-8859-15"): | 
					
						
							|  |  |  |             self.assertRaises( | 
					
						
							|  |  |  |                 UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 "\u4242".encode, | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |                 enc, | 
					
						
							|  |  |  |                 "test.unencreplhandler" | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |     def test_badregistercall(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::register_error() | 
					
						
							|  |  |  |         # Python/codecs.c::PyCodec_RegisterError() | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.register_error, 42) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-17 08:52:34 +00:00
										 |  |  |     def test_badlookupcall(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::lookup_error() | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.lookup_error) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |     def test_unknownhandler(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::lookup_error() | 
					
						
							|  |  |  |         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharrefvalues(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() | 
					
						
							|  |  |  |         # and inline implementations | 
					
						
							| 
									
										
										
										
											2015-03-15 23:41:37 +02:00
										 |  |  |         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000, 100000, | 
					
						
							|  |  |  |              500000, 1000000) | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |         s = "".join([chr(x) for x in v]) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) | 
					
						
							|  |  |  |         for enc in ("ascii", "iso-8859-15"): | 
					
						
							|  |  |  |             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): | 
					
						
							|  |  |  |                 s.encode(enc, err) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_decodehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_decode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							| 
									
										
										
										
											2007-08-27 20:40:10 +00:00
										 |  |  |         self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def baddecodereturn1(exc): | 
					
						
							|  |  |  |             return 42 | 
					
						
							|  |  |  |         codecs.register_error("test.baddecodereturn1", baddecodereturn1) | 
					
						
							| 
									
										
										
										
											2007-08-27 20:40:10 +00:00
										 |  |  |         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def baddecodereturn2(exc): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("?", None) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.baddecodereturn2", baddecodereturn2) | 
					
						
							| 
									
										
										
										
											2007-08-27 20:40:10 +00:00
										 |  |  |         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler = PosReturn() | 
					
						
							|  |  |  |         codecs.register_error("test.posreturn", handler.handle) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Negative position out of bounds | 
					
						
							|  |  |  |         handler.pos = -3 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid positive position | 
					
						
							|  |  |  |         handler.pos = 1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-14 21:28:07 +00:00
										 |  |  |         # Largest valid positive position (one beyond end of input) | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler.pos = 2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Invalid positive position | 
					
						
							|  |  |  |         handler.pos = 3 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         # Restart at the "0" | 
					
						
							|  |  |  |         handler.pos = 6 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) | 
					
						
							|  |  |  |         self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D()) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_encodehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def badencodereturn1(exc): | 
					
						
							|  |  |  |             return 42 | 
					
						
							|  |  |  |         codecs.register_error("test.badencodereturn1", badencodereturn1) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def badencodereturn2(exc): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("?", None) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.badencodereturn2", badencodereturn2) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler = PosReturn() | 
					
						
							|  |  |  |         codecs.register_error("test.posreturn", handler.handle) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Negative position out of bounds | 
					
						
							|  |  |  |         handler.pos = -3 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid positive position | 
					
						
							|  |  |  |         handler.pos = 1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Largest valid positive position (one beyond end of input | 
					
						
							|  |  |  |         handler.pos = 2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Invalid positive position | 
					
						
							|  |  |  |         handler.pos = 3 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler.pos = 0 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2014-11-25 13:57:17 +02:00
										 |  |  |         for err in ("strict", "replace", "xmlcharrefreplace", | 
					
						
							|  |  |  |                     "backslashreplace", "namereplace", "test.posreturn"): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) | 
					
						
							|  |  |  |             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) | 
					
						
							|  |  |  |             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_translatehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							|  |  |  |         # (Unfortunately the errors argument is not directly accessible | 
					
						
							|  |  |  |         # from Python, so we can't test that much) | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2007-10-24 21:25:34 +00:00
										 |  |  |         #self.assertRaises(ValueError, "\xff".translate, D()) | 
					
						
							| 
									
										
										
										
											2014-04-05 15:35:01 +02:00
										 |  |  |         self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1}) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |     def test_bug828737(self): | 
					
						
							|  |  |  |         charmap = { | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ord("&"): "&", | 
					
						
							|  |  |  |             ord("<"): "<", | 
					
						
							|  |  |  |             ord(">"): ">", | 
					
						
							|  |  |  |             ord('"'): """, | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2004-01-18 20:29:55 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |         for n in (1, 10, 100, 1000): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             text = 'abc<def>ghi'*n | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |             text.translate(charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  |     def test_mutatingdecodehandler(self): | 
					
						
							|  |  |  |         baddata = [ | 
					
						
							|  |  |  |             ("ascii", b"\xff"), | 
					
						
							|  |  |  |             ("utf-7", b"++"), | 
					
						
							|  |  |  |             ("utf-8",  b"\xff"), | 
					
						
							|  |  |  |             ("utf-16", b"\xff"), | 
					
						
							| 
									
										
										
										
											2007-08-16 21:55:45 +00:00
										 |  |  |             ("utf-32", b"\xff"), | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  |             ("unicode-escape", b"\\u123g"), | 
					
						
							|  |  |  |             ("raw-unicode-escape", b"\\u123g"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def replacing(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 exc.object = 42 | 
					
						
							|  |  |  |                 return ("\u4242", 0) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.replacing", replacing) | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-03-18 15:44:11 +09:00
										 |  |  |         for (encoding, data) in baddata: | 
					
						
							|  |  |  |             with self.assertRaises(TypeError): | 
					
						
							|  |  |  |                 data.decode(encoding, "test.replacing") | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def mutating(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeDecodeError): | 
					
						
							| 
									
										
										
										
											2018-01-23 22:50:50 +08:00
										 |  |  |                 exc.object = b"" | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  |                 return ("\u4242", 0) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.mutating", mutating) | 
					
						
							|  |  |  |         # If the decoder doesn't pick up the modified input the following | 
					
						
							|  |  |  |         # will lead to an endless loop | 
					
						
							| 
									
										
										
										
											2019-03-18 15:44:11 +09:00
										 |  |  |         for (encoding, data) in baddata: | 
					
						
							|  |  |  |             self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242") | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-01-31 20:48:05 +08:00
										 |  |  |     # issue32583 | 
					
						
							|  |  |  |     def test_crashing_decode_handler(self): | 
					
						
							|  |  |  |         # better generating one more character to fill the extra space slot | 
					
						
							|  |  |  |         # so in debug build it can steadily fail | 
					
						
							|  |  |  |         def forward_shorter_than_end(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 # size one character, 0 < forward < exc.end | 
					
						
							|  |  |  |                 return ('\ufffd', exc.start+1) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.forward_shorter_than_end", forward_shorter_than_end) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode( | 
					
						
							|  |  |  |                 'utf-16-le', 'test.forward_shorter_than_end'), | 
					
						
							|  |  |  |             '\ufffd\ufffd\ufffd\ufffd\xd8\x00' | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode( | 
					
						
							|  |  |  |                 'utf-16-be', 'test.forward_shorter_than_end'), | 
					
						
							|  |  |  |             '\ufffd\ufffd\ufffd\ufffd\xd8\x00' | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode( | 
					
						
							|  |  |  |                 'utf-32-le', 'test.forward_shorter_than_end'), | 
					
						
							|  |  |  |             '\ufffd\ufffd\ufffd\u1111\x00' | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode( | 
					
						
							|  |  |  |                 'utf-32-be', 'test.forward_shorter_than_end'), | 
					
						
							|  |  |  |             '\ufffd\ufffd\ufffd\u1111\x00' | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def replace_with_long(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 exc.object = b"\x00" * 8 | 
					
						
							|  |  |  |                 return ('\ufffd', exc.start) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.replace_with_long", replace_with_long) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             b'\x00'.decode('utf-16', 'test.replace_with_long'), | 
					
						
							|  |  |  |             '\ufffd\x00\x00\x00\x00' | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             b'\x00'.decode('utf-32', 'test.replace_with_long'), | 
					
						
							|  |  |  |             '\ufffd\x00\x00' | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-18 16:08:52 +03:00
										 |  |  |     def test_fake_error_class(self): | 
					
						
							|  |  |  |         handlers = [ | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             codecs.ignore_errors, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             codecs.backslashreplace_errors, | 
					
						
							| 
									
										
										
										
											2015-05-18 16:10:40 +03:00
										 |  |  |             codecs.namereplace_errors, | 
					
						
							| 
									
										
										
										
											2015-05-18 16:08:52 +03:00
										 |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error('surrogateescape'), | 
					
						
							|  |  |  |             codecs.lookup_error('surrogatepass'), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError: | 
					
						
							|  |  |  |             class FakeUnicodeError(str): | 
					
						
							|  |  |  |                 __class__ = cls | 
					
						
							|  |  |  |             for handler in handlers: | 
					
						
							|  |  |  |                 with self.subTest(handler=handler, error_class=cls): | 
					
						
							|  |  |  |                     self.assertRaises(TypeError, handler, FakeUnicodeError()) | 
					
						
							|  |  |  |             class FakeUnicodeError(Exception): | 
					
						
							|  |  |  |                 __class__ = cls | 
					
						
							|  |  |  |             for handler in handlers: | 
					
						
							|  |  |  |                 with self.subTest(handler=handler, error_class=cls): | 
					
						
							|  |  |  |                     with self.assertRaises((TypeError, FakeUnicodeError)): | 
					
						
							|  |  |  |                         handler(FakeUnicodeError()) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							| 
									
										
										
										
											2013-06-12 21:25:59 -04:00
										 |  |  |     unittest.main() |