| 
									
										
										
										
											2011-11-15 22:44:05 +01:00
										 |  |  | import codecs | 
					
						
							|  |  |  | import html.entities | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import test.support | 
					
						
							|  |  |  | import unicodedata | 
					
						
							|  |  |  | import unittest | 
					
						
							|  |  |  | import warnings | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-05 13:01:41 +02:00
										 |  |  | try: | 
					
						
							|  |  |  |     import ctypes | 
					
						
							|  |  |  | except ImportError: | 
					
						
							|  |  |  |     ctypes = None | 
					
						
							|  |  |  |     SIZEOF_WCHAR_T = -1 | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar) | 
					
						
							| 
									
										
										
										
											2011-09-29 20:01:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | class PosReturn: | 
					
						
							|  |  |  |     # this can be used for configurable callbacks | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.pos = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def handle(self, exc): | 
					
						
							|  |  |  |         oldpos = self.pos | 
					
						
							|  |  |  |         realpos = oldpos | 
					
						
							|  |  |  |         if realpos<0: | 
					
						
							| 
									
										
										
										
											2003-02-19 02:35:07 +00:00
										 |  |  |             realpos = len(exc.object) + realpos | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         # if we don't advance this time, terminate on the next call | 
					
						
							|  |  |  |         # otherwise we'd get an endless loop | 
					
						
							|  |  |  |         if realpos <= exc.start: | 
					
						
							|  |  |  |             self.pos = len(exc.object) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         return ("<?>", oldpos) | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  | # A UnicodeEncodeError object with a bad start attribute | 
					
						
							|  |  |  | class BadStartUnicodeEncodeError(UnicodeEncodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.start = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeEncodeError object with a bad object attribute | 
					
						
							|  |  |  | class BadObjectUnicodeEncodeError(UnicodeEncodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.object = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeDecodeError object without an end attribute | 
					
						
							|  |  |  | class NoEndUnicodeDecodeError(UnicodeDecodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |         UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeDecodeError object with a bad object attribute | 
					
						
							|  |  |  | class BadObjectUnicodeDecodeError(UnicodeDecodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |         UnicodeDecodeError.__init__(self, "ascii", bytearray(b""), 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.object = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without a start attribute | 
					
						
							|  |  |  | class NoStartUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.start | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without an end attribute | 
					
						
							|  |  |  | class NoEndUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self,  "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without an object attribute | 
					
						
							|  |  |  | class NoObjectUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.object | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | class CodecCallbackTest(unittest.TestCase): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharrefreplace(self): | 
					
						
							|  |  |  |         # replace unencodable characters which numeric character entities. | 
					
						
							|  |  |  |         # For ascii, latin-1 and charmaps this is completely implemented | 
					
						
							|  |  |  |         # in C and should be reasonably fast. | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         s = "\u30b9\u30d1\u30e2 \xe4nd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.encode("ascii", "xmlcharrefreplace"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"スパモ änd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.encode("latin-1", "xmlcharrefreplace"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"スパモ \xe4nd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharnamereplace(self): | 
					
						
							|  |  |  |         # This time use a named character entity for unencodable | 
					
						
							|  |  |  |         # characters, if one is available. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def xmlcharnamereplace(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |             l = [] | 
					
						
							|  |  |  |             for c in exc.object[exc.start:exc.end]: | 
					
						
							|  |  |  |                 try: | 
					
						
							| 
									
										
										
										
											2008-05-17 22:02:32 +00:00
										 |  |  |                     l.append("&%s;" % html.entities.codepoint2name[ord(c)]) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 except KeyError: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     l.append("&#%d;" % ord(c)) | 
					
						
							|  |  |  |             return ("".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.xmlcharnamereplace", xmlcharnamereplace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"«ℜ» = ⟨ሴ€⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xabℜ\xbb = ⟨ሴ€⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_uninamereplace(self): | 
					
						
							|  |  |  |         # We're using the names from the unicode database this time, | 
					
						
							| 
									
										
										
										
											2002-11-25 17:58:02 +00:00
										 |  |  |         # and we're doing "syntax highlighting" here, i.e. we include | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         # the replaced text in ANSI escape sequences. For this it is | 
					
						
							|  |  |  |         # useful that the error handler is not called for every single | 
					
						
							|  |  |  |         # unencodable character, but for a complete sequence of | 
					
						
							|  |  |  |         # unencodable characters, otherwise we would output many | 
					
						
							| 
									
										
										
										
											2009-02-21 20:59:32 +00:00
										 |  |  |         # unnecessary escape sequences. | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def uninamereplace(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |             l = [] | 
					
						
							|  |  |  |             for c in exc.object[exc.start:exc.end]: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 l.append(unicodedata.name(c, "0x%x" % ord(c))) | 
					
						
							|  |  |  |             return ("\033[1m%s\033[0m" % ", ".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.uninamereplace", uninamereplace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "\xac\u1234\u20ac\u8000" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_backslashescape(self): | 
					
						
							|  |  |  |         # Does the same as the "unicode-escape" encoding, but with different | 
					
						
							|  |  |  |         # base encodings. | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |         sin = "a\xac\u1234\u20ac\u8000\U0010ffff" | 
					
						
							|  |  |  |         sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |         sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-10-04 19:06:00 +03:00
										 |  |  |         sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |     def test_decoding_callbacks(self): | 
					
						
							|  |  |  |         # This is a test for a decoding callback handler | 
					
						
							|  |  |  |         # that allows the decoding of the invalid sequence | 
					
						
							|  |  |  |         # "\xc0\x80" and returns "\x00" instead of raising an error. | 
					
						
							|  |  |  |         # All other illegal sequences will be handled strictly. | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         def relaxedutf8(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |             if exc.object[exc.start:exc.start+2] == b"\xc0\x80": | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\x00", exc.start+2) # retry after two bytes | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise exc | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |         codecs.register_error("test.relaxedutf8", relaxedutf8) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |         # all the "\xc0\x80" will be decoded to "\x00" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sout = "a\x00b\x00c\xfc\x00\x00" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sin = b"\xc0\x80\xc0\x81" | 
					
						
							| 
									
										
										
										
											2010-07-01 07:32:02 +00:00
										 |  |  |         self.assertRaises(UnicodeDecodeError, sin.decode, | 
					
						
							|  |  |  |                           "utf-8", "test.relaxedutf8") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_charmapencode(self): | 
					
						
							|  |  |  |         # For charmap encodings the replacement string will be | 
					
						
							|  |  |  |         # mapped through the encoding again. This means, that | 
					
						
							|  |  |  |         # to be able to use e.g. the "replace" handler, the | 
					
						
							|  |  |  |         # charmap has to have a mapping for "?". | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         charmap = dict((ord(c), bytes(2*c.upper(), 'ascii')) for c in "abcdefgh") | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abc" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"AABBCC" | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abcA" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         charmap[ord("?")] = b"XYZ" | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abcDEF" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"AABBCCXYZXYZXYZ" | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         charmap[ord("?")] = "XYZ" # wrong type in mapping | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |     def test_decodeunicodeinternal(self): | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |         with test.support.check_warnings(('unicode_internal codec has been ' | 
					
						
							|  |  |  |                                           'deprecated', DeprecationWarning)): | 
					
						
							|  |  |  |             self.assertRaises( | 
					
						
							|  |  |  |                 UnicodeDecodeError, | 
					
						
							|  |  |  |                 b"\x00\x00\x00\x00\x00".decode, | 
					
						
							|  |  |  |                 "unicode-internal", | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2011-09-29 20:01:55 +02:00
										 |  |  |         if SIZEOF_WCHAR_T == 4: | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |             def handler_unicodeinternal(exc): | 
					
						
							|  |  |  |                 if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                     raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\x01", 1) | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |             with test.support.check_warnings(('unicode_internal codec has been ' | 
					
						
							|  |  |  |                                               'deprecated', DeprecationWarning)): | 
					
						
							|  |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), | 
					
						
							|  |  |  |                     "\u0000" | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), | 
					
						
							|  |  |  |                     "\u0000\ufffd" | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |                 codecs.register_error("test.hui", handler_unicodeinternal) | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |                 self.assertEqual( | 
					
						
							|  |  |  |                     b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), | 
					
						
							|  |  |  |                     "\u0000\u0001\u0000" | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |     def test_callbacks(self): | 
					
						
							|  |  |  |         def handler1(exc): | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             r = range(exc.start, exc.end) | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 l = ["<%d>" % ord(exc.object[pos]) for pos in r] | 
					
						
							|  |  |  |             elif isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 l = ["<%d>" % exc.object[pos] for pos in r] | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("[%s]" % "".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error("test.handler1", handler1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def handler2(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)] | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("[%s]" % "".join(l), exc.end+1) # skip one character | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error("test.handler2", handler2) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         s = b"\x00\x81\x7f\x80\xff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.decode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\x00[<129>]\x7f[<128>][<255>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.decode("ascii", "test.handler2"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\x00[<129>][<128>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2013-01-29 10:20:44 +02:00
										 |  |  |             "\u3042[<92><117><51>]xxx" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"\\u3042\u3xx".decode("unicode-escape", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2013-01-29 10:20:44 +02:00
										 |  |  |             "\u3042[<92><117><51>]xx" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "z[<98>][<99>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "g\xfc\xdfrk".encode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"g[<252><223>]rk" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "g\xfc\xdf".encode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"g[<252><223>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_longstrings(self): | 
					
						
							|  |  |  |         # test long strings to check for memory overflow problems | 
					
						
							| 
									
										
										
										
											2007-08-16 21:55:45 +00:00
										 |  |  |         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", | 
					
						
							|  |  |  |                    "backslashreplace"] | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         # register the handlers under different names, | 
					
						
							|  |  |  |         # to prevent the codec from recognizing the name | 
					
						
							|  |  |  |         for err in errors: | 
					
						
							|  |  |  |             codecs.register_error("test." + err, codecs.lookup_error(err)) | 
					
						
							|  |  |  |         l = 1000 | 
					
						
							|  |  |  |         errors += [ "test." + err for err in errors ] | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]: | 
					
						
							| 
									
										
										
										
											2007-08-16 21:55:45 +00:00
										 |  |  |             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", | 
					
						
							|  |  |  |                         "utf-8", "utf-7", "utf-16", "utf-32"): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 for err in errors: | 
					
						
							| 
									
										
										
										
											2002-11-09 05:26:15 +00:00
										 |  |  |                     try: | 
					
						
							|  |  |  |                         uni.encode(enc, err) | 
					
						
							|  |  |  |                     except UnicodeError: | 
					
						
							|  |  |  |                         pass | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def check_exceptionobjectargs(self, exctype, args, msg): | 
					
						
							|  |  |  |         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion | 
					
						
							|  |  |  |         # check with one missing argument | 
					
						
							|  |  |  |         self.assertRaises(TypeError, exctype, *args[:-1]) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # check with one argument too much | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(TypeError, exctype, *(args + ["too much"])) | 
					
						
							|  |  |  |         # check with one argument of the wrong type | 
					
						
							| 
									
										
										
										
											2007-11-06 21:34:58 +00:00
										 |  |  |         wrongargs = [ "spam", b"eggs", b"spam", 42, 1.0, None ] | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |         for i in range(len(args)): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             for wrongarg in wrongargs: | 
					
						
							|  |  |  |                 if type(wrongarg) is type(args[i]): | 
					
						
							| 
									
										
										
										
											2002-11-09 05:26:15 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 # build argument array | 
					
						
							|  |  |  |                 callargs = [] | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |                 for j in range(len(args)): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                     if i==j: | 
					
						
							|  |  |  |                         callargs.append(wrongarg) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         callargs.append(args[i]) | 
					
						
							|  |  |  |                 self.assertRaises(TypeError, exctype, *callargs) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # check with the correct number and type of arguments | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         exc = exctype(*args) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(str(exc), msg) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodeencodeerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\xfc' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "g\xfcrk", 1, 4, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't encode characters in position 1-3: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\xfcx", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\xfc' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\u0100x", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\u0100' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\uffffx", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "'ascii' codec can't encode character '\\uffff' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2011-09-29 20:01:55 +02:00
										 |  |  |         if SIZEOF_WCHAR_T == 4: | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |             self.check_exceptionobjectargs( | 
					
						
							|  |  |  |                 UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 ["ascii", "\U00010000x", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |                 "'ascii' codec can't encode character '\\U00010000' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodedecodeerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             ["ascii", bytearray(b"g\xfcrk"), 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't decode byte 0xfc in position 1: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             ["ascii", bytearray(b"g\xfcrk"), 1, 3, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't decode bytes in position 1-2: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodetranslateerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "can't translate character '\\xfc' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\u0100rk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "can't translate character '\\u0100' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\uffffrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |             "can't translate character '\\uffff' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2011-09-29 20:01:55 +02:00
										 |  |  |         if SIZEOF_WCHAR_T == 4: | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |             self.check_exceptionobjectargs( | 
					
						
							|  |  |  |                 UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 ["g\U00010000rk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2007-06-20 09:25:34 +00:00
										 |  |  |                 "can't translate character '\\U00010000' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\xfcrk", 1, 3, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "can't translate characters in position 1-2: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodstrictexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "strict" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "strict" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             Exception, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             Exception("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # If the correct exception is passed in, "strict" raises it | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodignoreexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "ignore" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.ignore_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "ignore" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.ignore_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # If the correct exception is passed in, "ignore" returns an empty replacement | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |                 UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeTranslateError("\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "replace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.replace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "replace" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.replace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             BadObjectUnicodeEncodeError() | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             BadObjectUnicodeDecodeError() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |         # With the correct exception, "replace" returns an "?" or "\ufffd" replacement | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("?", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |                 UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\ufffd", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeTranslateError("\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\ufffd", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodxmlcharrefreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" complains about the wrong exception types | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" can only be used for encoding | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |         s = "".join(chr(c) for c in cs) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |             codecs.xmlcharrefreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("".join("&#%d;" % ord(c) for c in s), len(s)) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodbackslashreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.backslashreplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" complains about the wrong exception types | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.backslashreplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" can only be used for encoding | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.backslashreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-11-21 19:29:53 +00:00
										 |  |  |             UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.backslashreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\u3042", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\x00", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\xff", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\u0100", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\uffff", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2011-10-05 13:01:41 +02:00
										 |  |  |         if SIZEOF_WCHAR_T > 0: | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							|  |  |  |                 codecs.backslashreplace_errors( | 
					
						
							|  |  |  |                     UnicodeEncodeError("ascii", "\U00010000", | 
					
						
							| 
									
										
										
										
											2011-11-04 18:23:06 +01:00
										 |  |  |                                        0, 1, "ouch")), | 
					
						
							|  |  |  |                 ("\\U00010000", 1) | 
					
						
							| 
									
										
										
										
											2011-10-05 13:01:41 +02:00
										 |  |  |             ) | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							|  |  |  |                 codecs.backslashreplace_errors( | 
					
						
							|  |  |  |                     UnicodeEncodeError("ascii", "\U0010ffff", | 
					
						
							| 
									
										
										
										
											2011-11-04 18:23:06 +01:00
										 |  |  |                                        0, 1, "ouch")), | 
					
						
							|  |  |  |                 ("\\U0010ffff", 1) | 
					
						
							| 
									
										
										
										
											2011-10-05 13:01:41 +02:00
										 |  |  |             ) | 
					
						
							|  |  |  |             # Lone surrogates (regardless of unicode width) | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							|  |  |  |                 codecs.backslashreplace_errors( | 
					
						
							|  |  |  |                     UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), | 
					
						
							|  |  |  |                 ("\\ud800", 1) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							|  |  |  |                 codecs.backslashreplace_errors( | 
					
						
							|  |  |  |                     UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), | 
					
						
							|  |  |  |                 ("\\udfff", 1) | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badhandlerresults(self): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for res in results: | 
					
						
							| 
									
										
											  
											
												Merged revisions 68633,68648,68667,68706,68718,68720-68721,68724-68727,68739 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
  r68633 | thomas.heller | 2009-01-16 12:53:44 -0600 (Fri, 16 Jan 2009) | 3 lines
  Change an example in the docs to avoid a mistake when the code is copy
  pasted and changed afterwards.
........
  r68648 | benjamin.peterson | 2009-01-16 22:28:57 -0600 (Fri, 16 Jan 2009) | 1 line
  use enumerate
........
  r68667 | amaury.forgeotdarc | 2009-01-17 14:18:59 -0600 (Sat, 17 Jan 2009) | 3 lines
  #4077: No need to append \n when calling Py_FatalError
  + fix a declaration to make it match the one in pythonrun.h
........
  r68706 | benjamin.peterson | 2009-01-17 19:28:46 -0600 (Sat, 17 Jan 2009) | 1 line
  fix grammar
........
  r68718 | georg.brandl | 2009-01-18 04:42:35 -0600 (Sun, 18 Jan 2009) | 1 line
  #4976: union() and intersection() take multiple args, but talk about "the other".
........
  r68720 | georg.brandl | 2009-01-18 04:45:22 -0600 (Sun, 18 Jan 2009) | 1 line
  #4974: fix redundant mention of lists and tuples.
........
  r68721 | georg.brandl | 2009-01-18 04:48:16 -0600 (Sun, 18 Jan 2009) | 1 line
  #4914: trunc is in math.
........
  r68724 | georg.brandl | 2009-01-18 07:24:10 -0600 (Sun, 18 Jan 2009) | 1 line
  #4979: correct result range for some random functions.
........
  r68725 | georg.brandl | 2009-01-18 07:47:26 -0600 (Sun, 18 Jan 2009) | 1 line
  #4857: fix augmented assignment target spec.
........
  r68726 | georg.brandl | 2009-01-18 08:41:52 -0600 (Sun, 18 Jan 2009) | 1 line
  #4923: clarify what was added.
........
  r68727 | georg.brandl | 2009-01-18 12:25:30 -0600 (Sun, 18 Jan 2009) | 1 line
  #4986: augassigns are not expressions.
........
  r68739 | benjamin.peterson | 2009-01-18 15:11:38 -0600 (Sun, 18 Jan 2009) | 1 line
  fix test that wasn't working as expected #4990
........
											
										 
											2009-01-18 22:27:04 +00:00
										 |  |  |             codecs.register_error("test.badhandler", lambda x: res) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             for enc in encs: | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     TypeError, | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     "\u3042".encode, | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                     enc, | 
					
						
							|  |  |  |                     "test.badhandler" | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             for (enc, bytes) in ( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 ("ascii", b"\xff"), | 
					
						
							|  |  |  |                 ("utf-8", b"\xff"), | 
					
						
							|  |  |  |                 ("utf-7", b"+x-"), | 
					
						
							|  |  |  |                 ("unicode-internal", b"\x00"), | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             ): | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |                 with test.support.check_warnings(): | 
					
						
							| 
									
										
										
										
											2011-11-15 22:44:05 +01:00
										 |  |  |                     # unicode-internal has been deprecated | 
					
						
							|  |  |  |                     self.assertRaises( | 
					
						
							|  |  |  |                         TypeError, | 
					
						
							|  |  |  |                         bytes.decode, | 
					
						
							|  |  |  |                         enc, | 
					
						
							|  |  |  |                         "test.badhandler" | 
					
						
							|  |  |  |                     ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_lookup(self): | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) | 
					
						
							|  |  |  |         self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) | 
					
						
							|  |  |  |         self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("xmlcharrefreplace") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             codecs.backslashreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("backslashreplace") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |     def test_unencodablereplacement(self): | 
					
						
							|  |  |  |         def unencrepl(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeEncodeError): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\u4242", exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.unencreplhandler", unencrepl) | 
					
						
							|  |  |  |         for enc in ("ascii", "iso-8859-1", "iso-8859-15"): | 
					
						
							|  |  |  |             self.assertRaises( | 
					
						
							|  |  |  |                 UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 "\u4242".encode, | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |                 enc, | 
					
						
							|  |  |  |                 "test.unencreplhandler" | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |     def test_badregistercall(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::register_error() | 
					
						
							|  |  |  |         # Python/codecs.c::PyCodec_RegisterError() | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.register_error, 42) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-17 08:52:34 +00:00
										 |  |  |     def test_badlookupcall(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::lookup_error() | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.lookup_error) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |     def test_unknownhandler(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::lookup_error() | 
					
						
							|  |  |  |         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharrefvalues(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() | 
					
						
							|  |  |  |         # and inline implementations | 
					
						
							|  |  |  |         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) | 
					
						
							| 
									
										
										
										
											2011-09-29 20:01:55 +02:00
										 |  |  |         if SIZEOF_WCHAR_T == 4: | 
					
						
							| 
									
										
										
										
											2003-02-19 02:35:07 +00:00
										 |  |  |             v += (100000, 500000, 1000000) | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |         s = "".join([chr(x) for x in v]) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) | 
					
						
							|  |  |  |         for enc in ("ascii", "iso-8859-15"): | 
					
						
							|  |  |  |             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): | 
					
						
							|  |  |  |                 s.encode(enc, err) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_decodehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_decode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							| 
									
										
										
										
											2007-08-27 20:40:10 +00:00
										 |  |  |         self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def baddecodereturn1(exc): | 
					
						
							|  |  |  |             return 42 | 
					
						
							|  |  |  |         codecs.register_error("test.baddecodereturn1", baddecodereturn1) | 
					
						
							| 
									
										
										
										
											2007-08-27 20:40:10 +00:00
										 |  |  |         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def baddecodereturn2(exc): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("?", None) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.baddecodereturn2", baddecodereturn2) | 
					
						
							| 
									
										
										
										
											2007-08-27 20:40:10 +00:00
										 |  |  |         self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler = PosReturn() | 
					
						
							|  |  |  |         codecs.register_error("test.posreturn", handler.handle) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Negative position out of bounds | 
					
						
							|  |  |  |         handler.pos = -3 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid positive position | 
					
						
							|  |  |  |         handler.pos = 1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-14 21:28:07 +00:00
										 |  |  |         # Largest valid positive position (one beyond end of input) | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler.pos = 2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Invalid positive position | 
					
						
							|  |  |  |         handler.pos = 3 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         # Restart at the "0" | 
					
						
							|  |  |  |         handler.pos = 6 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) | 
					
						
							|  |  |  |         self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D()) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_encodehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def badencodereturn1(exc): | 
					
						
							|  |  |  |             return 42 | 
					
						
							|  |  |  |         codecs.register_error("test.badencodereturn1", badencodereturn1) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def badencodereturn2(exc): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("?", None) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.badencodereturn2", badencodereturn2) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler = PosReturn() | 
					
						
							|  |  |  |         codecs.register_error("test.posreturn", handler.handle) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Negative position out of bounds | 
					
						
							|  |  |  |         handler.pos = -3 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid positive position | 
					
						
							|  |  |  |         handler.pos = 1 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Largest valid positive position (one beyond end of input | 
					
						
							|  |  |  |         handler.pos = 2 | 
					
						
							| 
									
										
										
										
											2010-11-20 19:04:17 +00:00
										 |  |  |         self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Invalid positive position | 
					
						
							|  |  |  |         handler.pos = 3 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler.pos = 0 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) | 
					
						
							|  |  |  |             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) | 
					
						
							|  |  |  |             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_translatehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							|  |  |  |         # (Unfortunately the errors argument is not directly accessible | 
					
						
							|  |  |  |         # from Python, so we can't test that much) | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2007-10-24 21:25:34 +00:00
										 |  |  |         #self.assertRaises(ValueError, "\xff".translate, D()) | 
					
						
							| 
									
										
										
										
											2014-04-05 15:35:01 +02:00
										 |  |  |         self.assertRaises(ValueError, "\xff".translate, {0xff: sys.maxunicode+1}) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |     def test_bug828737(self): | 
					
						
							|  |  |  |         charmap = { | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ord("&"): "&", | 
					
						
							|  |  |  |             ord("<"): "<", | 
					
						
							|  |  |  |             ord(">"): ">", | 
					
						
							|  |  |  |             ord('"'): """, | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2004-01-18 20:29:55 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |         for n in (1, 10, 100, 1000): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             text = 'abc<def>ghi'*n | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |             text.translate(charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  |     def test_mutatingdecodehandler(self): | 
					
						
							|  |  |  |         baddata = [ | 
					
						
							|  |  |  |             ("ascii", b"\xff"), | 
					
						
							|  |  |  |             ("utf-7", b"++"), | 
					
						
							|  |  |  |             ("utf-8",  b"\xff"), | 
					
						
							|  |  |  |             ("utf-16", b"\xff"), | 
					
						
							| 
									
										
										
										
											2007-08-16 21:55:45 +00:00
										 |  |  |             ("utf-32", b"\xff"), | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  |             ("unicode-escape", b"\\u123g"), | 
					
						
							|  |  |  |             ("raw-unicode-escape", b"\\u123g"), | 
					
						
							|  |  |  |             ("unicode-internal", b"\xff"), | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def replacing(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 exc.object = 42 | 
					
						
							|  |  |  |                 return ("\u4242", 0) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.replacing", replacing) | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         with test.support.check_warnings(): | 
					
						
							| 
									
										
										
										
											2011-11-15 22:44:05 +01:00
										 |  |  |             # unicode-internal has been deprecated | 
					
						
							|  |  |  |             for (encoding, data) in baddata: | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |                 with self.assertRaises(TypeError): | 
					
						
							|  |  |  |                     data.decode(encoding, "test.replacing") | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def mutating(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 exc.object[:] = b"" | 
					
						
							|  |  |  |                 return ("\u4242", 0) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.mutating", mutating) | 
					
						
							|  |  |  |         # If the decoder doesn't pick up the modified input the following | 
					
						
							|  |  |  |         # will lead to an endless loop | 
					
						
							| 
									
										
										
										
											2011-11-17 12:23:34 +02:00
										 |  |  |         with test.support.check_warnings(): | 
					
						
							|  |  |  |             # unicode-internal has been deprecated | 
					
						
							|  |  |  |             for (encoding, data) in baddata: | 
					
						
							|  |  |  |                 with self.assertRaises(TypeError): | 
					
						
							|  |  |  |                     data.decode(encoding, "test.replacing") | 
					
						
							| 
									
										
										
										
											2007-07-30 13:31:40 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							| 
									
										
										
										
											2013-06-12 21:25:59 -04:00
										 |  |  |     unittest.main() |