| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | import test.test_support, unittest | 
					
						
							|  |  |  | import sys, codecs, htmlentitydefs, unicodedata | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | class PosReturn: | 
					
						
							|  |  |  |     # this can be used for configurable callbacks | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.pos = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def handle(self, exc): | 
					
						
							|  |  |  |         oldpos = self.pos | 
					
						
							|  |  |  |         realpos = oldpos | 
					
						
							|  |  |  |         if realpos<0: | 
					
						
							| 
									
										
										
										
											2003-02-19 02:35:07 +00:00
										 |  |  |             realpos = len(exc.object) + realpos | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         # if we don't advance this time, terminate on the next call | 
					
						
							|  |  |  |         # otherwise we'd get an endless loop | 
					
						
							|  |  |  |         if realpos <= exc.start: | 
					
						
							|  |  |  |             self.pos = len(exc.object) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         return ("<?>", oldpos) | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  | # A UnicodeEncodeError object with a bad start attribute | 
					
						
							|  |  |  | class BadStartUnicodeEncodeError(UnicodeEncodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.start = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeEncodeError object with a bad object attribute | 
					
						
							|  |  |  | class BadObjectUnicodeEncodeError(UnicodeEncodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.object = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeDecodeError object without an end attribute | 
					
						
							|  |  |  | class NoEndUnicodeDecodeError(UnicodeDecodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeDecodeError object with a bad object attribute | 
					
						
							|  |  |  | class BadObjectUnicodeDecodeError(UnicodeDecodeError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeDecodeError.__init__(self, "ascii", b"", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.object = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without a start attribute | 
					
						
							|  |  |  | class NoStartUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.start | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without an end attribute | 
					
						
							|  |  |  | class NoEndUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self,  "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # A UnicodeTranslateError object without an object attribute | 
					
						
							|  |  |  | class NoObjectUnicodeTranslateError(UnicodeTranslateError): | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |         UnicodeTranslateError.__init__(self, "", 0, 1, "bad") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         del self.object | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | class CodecCallbackTest(unittest.TestCase): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharrefreplace(self): | 
					
						
							|  |  |  |         # replace unencodable characters which numeric character entities. | 
					
						
							|  |  |  |         # For ascii, latin-1 and charmaps this is completely implemented | 
					
						
							|  |  |  |         # in C and should be reasonably fast. | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         s = "\u30b9\u30d1\u30e2 \xe4nd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.encode("ascii", "xmlcharrefreplace"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"スパモ änd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.encode("latin-1", "xmlcharrefreplace"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"スパモ \xe4nd eggs" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharnamereplace(self): | 
					
						
							|  |  |  |         # This time use a named character entity for unencodable | 
					
						
							|  |  |  |         # characters, if one is available. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def xmlcharnamereplace(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |             l = [] | 
					
						
							|  |  |  |             for c in exc.object[exc.start:exc.end]: | 
					
						
							|  |  |  |                 try: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     l.append("&%s;" % htmlentitydefs.codepoint2name[ord(c)]) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 except KeyError: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     l.append("&#%d;" % ord(c)) | 
					
						
							|  |  |  |             return ("".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.xmlcharnamereplace", xmlcharnamereplace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"«ℜ» = ⟨ሴ€⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xabℜ\xbb = ⟨ሴ€⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xabℜ\xbb = ⟨ሴ\xa4⟩" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_uninamereplace(self): | 
					
						
							|  |  |  |         # We're using the names from the unicode database this time, | 
					
						
							| 
									
										
										
										
											2002-11-25 17:58:02 +00:00
										 |  |  |         # and we're doing "syntax highlighting" here, i.e. we include | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         # the replaced text in ANSI escape sequences. For this it is | 
					
						
							|  |  |  |         # useful that the error handler is not called for every single | 
					
						
							|  |  |  |         # unencodable character, but for a complete sequence of | 
					
						
							|  |  |  |         # unencodable characters, otherwise we would output many | 
					
						
							|  |  |  |         # unneccessary escape sequences. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def uninamereplace(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |             l = [] | 
					
						
							|  |  |  |             for c in exc.object[exc.start:exc.end]: | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 l.append(unicodedata.name(c, "0x%x" % ord(c))) | 
					
						
							|  |  |  |             return ("\033[1m%s\033[0m" % ", ".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.uninamereplace", uninamereplace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "\xac\u1234\u20ac\u8000" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_backslashescape(self): | 
					
						
							|  |  |  |         # Does the same as the "unicode-escape" encoding, but with different | 
					
						
							|  |  |  |         # base encodings. | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "a\xac\u1234\u20ac\u8000" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |             sin += chr(sys.maxunicode) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"a\\xac\\u1234\\u20ac\\u8000" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							| 
									
										
										
										
											2007-06-06 15:17:22 +00:00
										 |  |  |             sout += bytes("\\U%08x" % sys.maxunicode) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"a\xac\\u1234\\u20ac\\u8000" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							| 
									
										
										
										
											2007-06-06 15:17:22 +00:00
										 |  |  |             sout += bytes("\\U%08x" % sys.maxunicode) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"a\xac\\u1234\xa4\\u8000" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							| 
									
										
										
										
											2007-06-06 15:17:22 +00:00
										 |  |  |             sout += bytes("\\U%08x" % sys.maxunicode) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |     def test_decoderelaxedutf8(self): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         # This is the test for a decoding callback handler, | 
					
						
							|  |  |  |         # that relaxes the UTF-8 minimal encoding restriction. | 
					
						
							|  |  |  |         # A null byte that is encoded as "\xc0\x80" will be | 
					
						
							|  |  |  |         # decoded as a null byte. All other illegal sequences | 
					
						
							|  |  |  |         # will be handled strictly. | 
					
						
							|  |  |  |         def relaxedutf8(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             if exc.object[exc.start:exc.end].startswith(b"\xc0\x80"): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\x00", exc.start+2) # retry after two bytes | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise exc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error( | 
					
						
							|  |  |  |             "test.relaxedutf8", relaxedutf8) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sin = b"a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sout = "a\x00b\x00c\xfc\x00\x00" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sin = b"\xc0\x80\xc0\x81" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_charmapencode(self): | 
					
						
							|  |  |  |         # For charmap encodings the replacement string will be | 
					
						
							|  |  |  |         # mapped through the encoding again. This means, that | 
					
						
							|  |  |  |         # to be able to use e.g. the "replace" handler, the | 
					
						
							|  |  |  |         # charmap has to have a mapping for "?". | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         charmap = dict((ord(c), str8(2*c.upper())) for c in "abcdefgh") | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abc" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"AABBCC" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abcA" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         charmap[ord("?")] = str8("XYZ") | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         sin = "abcDEF" | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         sout = b"AABBCCXYZXYZXYZ" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         charmap[ord("?")] = "XYZ" # wrong type in mapping | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |     def test_decodeunicodeinternal(self): | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"\x00\x00\x00\x00\x00".decode, | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |             "unicode-internal", | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							|  |  |  |             def handler_unicodeinternal(exc): | 
					
						
							|  |  |  |                 if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                     raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\x01", 1) | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 "\u0000" | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 "\u0000\ufffd" | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             codecs.register_error("test.hui", handler_unicodeinternal) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 "\u0000\u0001\u0000" | 
					
						
							| 
									
										
										
										
											2005-08-30 10:23:14 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |     def test_callbacks(self): | 
					
						
							|  |  |  |         def handler1(exc): | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             r = range(exc.start, exc.end) | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeEncodeError): | 
					
						
							|  |  |  |                 l = ["<%d>" % ord(exc.object[pos]) for pos in r] | 
					
						
							|  |  |  |             elif isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 l = ["<%d>" % exc.object[pos] for pos in r] | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("[%s]" % "".join(l), exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error("test.handler1", handler1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def handler2(exc): | 
					
						
							|  |  |  |             if not isinstance(exc, UnicodeDecodeError): | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)] | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("[%s]" % "".join(l), exc.end+1) # skip one character | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         codecs.register_error("test.handler2", handler2) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         s = b"\x00\x81\x7f\x80\xff" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.decode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\x00[<129>]\x7f[<128>][<255>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             s.decode("ascii", "test.handler2"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\x00[<129>][<128>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\u3042[<92><117><51><120>]xx" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"\\u3042\u3xx".decode("unicode-escape", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "\u3042[<92><117><51><120><120>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "z[<98>][<99>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "g\xfc\xdfrk".encode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"g[<252><223>]rk" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.assertEqual( | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             "g\xfc\xdf".encode("ascii", "test.handler1"), | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             b"g[<252><223>]" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_longstrings(self): | 
					
						
							|  |  |  |         # test long strings to check for memory overflow problems | 
					
						
							|  |  |  |         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"] | 
					
						
							|  |  |  |         # register the handlers under different names, | 
					
						
							|  |  |  |         # to prevent the codec from recognizing the name | 
					
						
							|  |  |  |         for err in errors: | 
					
						
							|  |  |  |             codecs.register_error("test." + err, codecs.lookup_error(err)) | 
					
						
							|  |  |  |         l = 1000 | 
					
						
							|  |  |  |         errors += [ "test." + err for err in errors ] | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]: | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"): | 
					
						
							|  |  |  |                 for err in errors: | 
					
						
							| 
									
										
										
										
											2002-11-09 05:26:15 +00:00
										 |  |  |                     try: | 
					
						
							|  |  |  |                         uni.encode(enc, err) | 
					
						
							|  |  |  |                     except UnicodeError: | 
					
						
							|  |  |  |                         pass | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def check_exceptionobjectargs(self, exctype, args, msg): | 
					
						
							|  |  |  |         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion | 
					
						
							|  |  |  |         # check with one missing argument | 
					
						
							|  |  |  |         self.assertRaises(TypeError, exctype, *args[:-1]) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # check with one argument too much | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises(TypeError, exctype, *(args + ["too much"])) | 
					
						
							|  |  |  |         # check with one argument of the wrong type | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         wrongargs = [ "spam", str8("eggs"), b"spam", 42, 1.0, None ] | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |         for i in range(len(args)): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             for wrongarg in wrongargs: | 
					
						
							|  |  |  |                 if type(wrongarg) is type(args[i]): | 
					
						
							| 
									
										
										
										
											2002-11-09 05:26:15 +00:00
										 |  |  |                     continue | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                 # build argument array | 
					
						
							|  |  |  |                 callargs = [] | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |                 for j in range(len(args)): | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                     if i==j: | 
					
						
							|  |  |  |                         callargs.append(wrongarg) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         callargs.append(args[i]) | 
					
						
							|  |  |  |                 self.assertRaises(TypeError, exctype, *callargs) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # check with the correct number and type of arguments | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         exc = exctype(*args) | 
					
						
							|  |  |  |         self.assertEquals(str(exc), msg) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodeencodeerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "g\xfcrk", 1, 4, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't encode characters in position 1-3: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\xfcx", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\u0100x", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", "\uffffx", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							|  |  |  |             self.check_exceptionobjectargs( | 
					
						
							|  |  |  |                 UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 ["ascii", "\U00010000x", 0, 1, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodedecodeerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", b"g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't decode byte 0xfc in position 1: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeDecodeError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["ascii", b"g\xfcrk", 1, 3, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "'ascii' codec can't decode bytes in position 1-2: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_unicodetranslateerror(self): | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\xfcrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "can't translate character u'\\xfc' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\u0100rk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "can't translate character u'\\u0100' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\uffffrk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |             "can't translate character u'\\uffff' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         if sys.maxunicode > 0xffff: | 
					
						
							|  |  |  |             self.check_exceptionobjectargs( | 
					
						
							|  |  |  |                 UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 ["g\U00010000rk", 1, 2, "ouch"], | 
					
						
							| 
									
										
										
										
											2003-08-12 17:34:49 +00:00
										 |  |  |                 "can't translate character u'\\U00010000' in position 1: ouch" | 
					
						
							| 
									
										
										
										
											2003-08-12 17:32:43 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  |         self.check_exceptionobjectargs( | 
					
						
							|  |  |  |             UnicodeTranslateError, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             ["g\xfcrk", 1, 3, "ouch"], | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             "can't translate characters in position 1-2: ouch" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodstrictexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "strict" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "strict" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             Exception, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							|  |  |  |             Exception("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # If the correct exception is passed in, "strict" raises it | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             UnicodeEncodeError, | 
					
						
							|  |  |  |             codecs.strict_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodignoreexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "ignore" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.ignore_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "ignore" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.ignore_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # If the correct exception is passed in, "ignore" returns an empty replacement | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.ignore_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeTranslateError("\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "replace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.replace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "replace" complains about the wrong exception type | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.replace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             BadObjectUnicodeEncodeError() | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.replace_errors, | 
					
						
							|  |  |  |             BadObjectUnicodeDecodeError() | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2007-05-07 22:24:25 +00:00
										 |  |  |         # With the correct exception, "replace" returns an "?" or "\ufffd" replacement | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("?", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\ufffd", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.replace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeTranslateError("\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\ufffd", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodxmlcharrefreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" complains about the wrong exception types | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "xmlcharrefreplace" can only be used for encoding | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |         s = "".join(chr(c) for c in cs) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |             codecs.xmlcharrefreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch") | 
					
						
							| 
									
										
										
										
											2005-11-17 18:51:34 +00:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("".join("&#%d;" % ord(c) for c in s), len(s)) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_badandgoodbackslashreplaceexceptions(self): | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" complains about a non-exception passed in | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.backslashreplace_errors, | 
					
						
							|  |  |  |            42 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" complains about the wrong exception types | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertRaises( | 
					
						
							|  |  |  |            TypeError, | 
					
						
							|  |  |  |            codecs.backslashreplace_errors, | 
					
						
							|  |  |  |            UnicodeError("ouch") | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         # "backslashreplace" can only be used for encoding | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.backslashreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeDecodeError("ascii", b"\xff", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertRaises( | 
					
						
							|  |  |  |             TypeError, | 
					
						
							|  |  |  |             codecs.backslashreplace_errors, | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |             UnicodeTranslateError("\u3042", 0, 1, "ouch") | 
					
						
							| 
									
										
										
										
											2003-01-20 02:34:07 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         # Use the correct exception | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\u3042", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\x00", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\xff", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\u0100", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |             codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                 UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ("\\uffff", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         ) | 
					
						
							|  |  |  |         if sys.maxunicode>0xffff: | 
					
						
							|  |  |  |             self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                     UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 ("\\U00010000", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  |             self.assertEquals( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 codecs.backslashreplace_errors( | 
					
						
							| 
									
										
										
										
											2007-05-18 16:29:38 +00:00
										 |  |  |                     UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")), | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 ("\\U0010ffff", 1) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_badhandlerresults(self): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for res in results: | 
					
						
							|  |  |  |             codecs.register_error("test.badhandler", lambda: res) | 
					
						
							|  |  |  |             for enc in encs: | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     TypeError, | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                     "\u3042".encode, | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |                     enc, | 
					
						
							|  |  |  |                     "test.badhandler" | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             for (enc, bytes) in ( | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |                 ("ascii", b"\xff"), | 
					
						
							|  |  |  |                 ("utf-8", b"\xff"), | 
					
						
							|  |  |  |                 ("utf-7", b"+x-"), | 
					
						
							|  |  |  |                 ("unicode-internal", b"\x00"), | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  |             ): | 
					
						
							|  |  |  |                 self.assertRaises( | 
					
						
							|  |  |  |                     TypeError, | 
					
						
							|  |  |  |                     bytes.decode, | 
					
						
							|  |  |  |                     enc, | 
					
						
							|  |  |  |                     "test.badhandler" | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_lookup(self): | 
					
						
							|  |  |  |         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict")) | 
					
						
							|  |  |  |         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore")) | 
					
						
							|  |  |  |         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict")) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							|  |  |  |             codecs.xmlcharrefreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("xmlcharrefreplace") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         self.assertEquals( | 
					
						
							|  |  |  |             codecs.backslashreplace_errors, | 
					
						
							|  |  |  |             codecs.lookup_error("backslashreplace") | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |     def test_unencodablereplacement(self): | 
					
						
							|  |  |  |         def unencrepl(exc): | 
					
						
							|  |  |  |             if isinstance(exc, UnicodeEncodeError): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 return ("\u4242", exc.end) | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 raise TypeError("don't know how to handle %r" % exc) | 
					
						
							|  |  |  |         codecs.register_error("test.unencreplhandler", unencrepl) | 
					
						
							|  |  |  |         for enc in ("ascii", "iso-8859-1", "iso-8859-15"): | 
					
						
							|  |  |  |             self.assertRaises( | 
					
						
							|  |  |  |                 UnicodeEncodeError, | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |                 "\u4242".encode, | 
					
						
							| 
									
										
										
										
											2002-09-06 17:21:40 +00:00
										 |  |  |                 enc, | 
					
						
							|  |  |  |                 "test.unencreplhandler" | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |     def test_badregistercall(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::register_error() | 
					
						
							|  |  |  |         # Python/codecs.c::PyCodec_RegisterError() | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.register_error, 42) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-17 08:52:34 +00:00
										 |  |  |     def test_badlookupcall(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::lookup_error() | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.lookup_error) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |     def test_unknownhandler(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Modules/_codecsmodule.c::lookup_error() | 
					
						
							|  |  |  |         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_xmlcharrefvalues(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() | 
					
						
							|  |  |  |         # and inline implementations | 
					
						
							|  |  |  |         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) | 
					
						
							| 
									
										
										
										
											2003-01-09 11:38:50 +00:00
										 |  |  |         if sys.maxunicode>=100000: | 
					
						
							| 
									
										
										
										
											2003-02-19 02:35:07 +00:00
										 |  |  |             v += (100000, 500000, 1000000) | 
					
						
							| 
									
										
										
										
											2007-05-03 17:18:26 +00:00
										 |  |  |         s = "".join([chr(x) for x in v]) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) | 
					
						
							|  |  |  |         for enc in ("ascii", "iso-8859-15"): | 
					
						
							|  |  |  |             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): | 
					
						
							|  |  |  |                 s.encode(enc, err) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_decodehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_decode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							|  |  |  |         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def baddecodereturn1(exc): | 
					
						
							|  |  |  |             return 42 | 
					
						
							|  |  |  |         codecs.register_error("test.baddecodereturn1", baddecodereturn1) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def baddecodereturn2(exc): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("?", None) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.baddecodereturn2", baddecodereturn2) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler = PosReturn() | 
					
						
							|  |  |  |         codecs.register_error("test.posreturn", handler.handle) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -1 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -2 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Negative position out of bounds | 
					
						
							|  |  |  |         handler.pos = -3 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid positive position | 
					
						
							|  |  |  |         handler.pos = 1 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-12-14 21:28:07 +00:00
										 |  |  |         # Largest valid positive position (one beyond end of input) | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler.pos = 2 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals(b"\xff0".decode("ascii", "test.posreturn"), "<?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Invalid positive position | 
					
						
							|  |  |  |         handler.pos = 3 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         # Restart at the "0" | 
					
						
							|  |  |  |         handler.pos = 6 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals(b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) | 
					
						
							|  |  |  |         self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D()) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_encodehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def badencodereturn1(exc): | 
					
						
							|  |  |  |             return 42 | 
					
						
							|  |  |  |         codecs.register_error("test.badencodereturn1", badencodereturn1) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def badencodereturn2(exc): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             return ("?", None) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  |         codecs.register_error("test.badencodereturn2", badencodereturn2) | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler = PosReturn() | 
					
						
							|  |  |  |         codecs.register_error("test.posreturn", handler.handle) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -1 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid negative position | 
					
						
							|  |  |  |         handler.pos = -2 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?><?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Negative position out of bounds | 
					
						
							|  |  |  |         handler.pos = -3 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Valid positive position | 
					
						
							|  |  |  |         handler.pos = 1 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?>0") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Largest valid positive position (one beyond end of input | 
					
						
							|  |  |  |         handler.pos = 2 | 
					
						
							| 
									
										
										
										
											2007-05-09 10:44:06 +00:00
										 |  |  |         self.assertEquals("\xff0".encode("ascii", "test.posreturn"), b"<?>") | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Invalid positive position | 
					
						
							|  |  |  |         handler.pos = 3 | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         handler.pos = 0 | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2003-01-31 17:19:08 +00:00
										 |  |  |         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) | 
					
						
							|  |  |  |             self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) | 
					
						
							|  |  |  |             self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_translatehelper(self): | 
					
						
							|  |  |  |         # enhance coverage of: | 
					
						
							|  |  |  |         # Objects/unicodeobject.c::unicode_encode_call_errorhandler() | 
					
						
							|  |  |  |         # and callers | 
					
						
							|  |  |  |         # (Unfortunately the errors argument is not directly accessible | 
					
						
							|  |  |  |         # from Python, so we can't test that much) | 
					
						
							|  |  |  |         class D(dict): | 
					
						
							|  |  |  |             def __getitem__(self, key): | 
					
						
							|  |  |  |                 raise ValueError | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |         self.assertRaises(ValueError, "\xff".translate, D()) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1}) | 
					
						
							|  |  |  |         self.assertRaises(TypeError, "\xff".translate, {0xff: ()}) | 
					
						
							| 
									
										
										
										
											2003-01-08 23:22:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |     def test_bug828737(self): | 
					
						
							|  |  |  |         charmap = { | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             ord("&"): "&", | 
					
						
							|  |  |  |             ord("<"): "<", | 
					
						
							|  |  |  |             ord(">"): ">", | 
					
						
							|  |  |  |             ord('"'): """, | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2004-01-18 20:29:55 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |         for n in (1, 10, 100, 1000): | 
					
						
							| 
									
										
										
										
											2007-05-02 19:09:54 +00:00
										 |  |  |             text = 'abc<def>ghi'*n | 
					
						
							| 
									
										
										
										
											2003-10-24 14:25:28 +00:00
										 |  |  |             text.translate(charmap) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | def test_main(): | 
					
						
							| 
									
										
										
										
											2003-05-01 17:45:56 +00:00
										 |  |  |     test.test_support.run_unittest(CodecCallbackTest) | 
					
						
							| 
									
										
										
										
											2002-09-02 13:14:32 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     test_main() |