2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								""" Test script for the Unicode implementation.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Written by Bill Tutt.
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-24 07:59:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""#"
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-17 19:11:13 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from test_support import verify, verbose
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print 'Testing General Unicode Character Name, and case insensitivity...',
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# General and case insensitivity test:
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-20 11:15:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # put all \N escapes inside exec'd raw strings, to make sure this
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    # script runs even if the compiler chokes on \N escapes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    exec r"""
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								s = u"\N{LATIN CAPITAL LETTER T}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER H}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER E}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{SPACE}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER R}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN CAPITAL LETTER E}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER D}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{SPACE}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER f}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN CAPITAL LeTtEr o}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMaLl LETTER x}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{SPACE}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER A}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER T}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER E}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{SPACE}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER T}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER H}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER E}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{SpAcE}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER S}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER H}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER E}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER E}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{LATIN SMALL LETTER P}" \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    u"\N{FULL STOP}"
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-17 19:11:13 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								verify(s == u"The rEd fOx ate the sheep.", s)
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-20 11:15:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								except UnicodeError, v:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    print v
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:13:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								print "done."
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:00:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-24 07:59:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import unicodedata
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:00:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:13:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								print "Testing name to code mapping....",
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:00:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								for char in "SPAM":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    name = "LATIN SMALL LETTER %s" % char
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-24 07:59:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    code = unicodedata.lookup(name)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    verify(unicodedata.name(code) == name)
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:13:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								print "done."
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:00:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:13:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								print "Testing code to name mapping for all characters....",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								count = 0
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:00:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								for code in range(65536):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    try:
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-24 07:59:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        char = unichr(code)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        name = unicodedata.name(char)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        verify(unicodedata.lookup(name) == char)
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:13:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        count += 1
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-24 07:59:11 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    except (KeyError, ValueError):
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:00:42 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        pass
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print "done."
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 11:13:46 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								print "Found", count, "characters in the unicode name database"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# misc. symbol testing
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print "Testing misc. symbols for unicode character name expansion....",
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-20 11:15:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								exec r"""
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-17 19:11:13 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								verify(u"\N{PILCROW SIGN}" == u"\u00b6")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-20 11:15:25 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print "done."
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# strict error testing:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print "Testing unicode character name expansion strict error handling....",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 09:45:02 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    unicode("\N{blah}", 'unicode-escape', 'strict')
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								except UnicodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    pass
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								else:
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 09:45:02 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    raise AssertionError, "failed to raise an exception when given a bogus character name"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 09:45:02 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								except UnicodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    pass
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								else:
							 | 
						
					
						
							
								
									
										
										
										
											2001-01-19 09:45:02 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    raise AssertionError, "failed to raise an exception when given a very " \
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                          "long bogus character name"
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    unicode("\N{SPACE", 'unicode-escape', 'strict')
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								except UnicodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    pass
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								else:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    raise AssertionError, "failed to raise an exception for a missing closing brace."
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								try:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    unicode("\NSPACE", 'unicode-escape', 'strict')
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								except UnicodeError:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    pass
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								else:
							 | 
						
					
						
							
								
									
										
										
										
											2000-10-23 17:22:08 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    raise AssertionError, "failed to raise an exception for a missing opening brace."
							 | 
						
					
						
							
								
									
										
										
										
											2000-06-30 09:45:20 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								print "done."
							 |