| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  | from test.test_support import run_unittest, open_urlresource | 
					
						
							|  |  |  | import unittest | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2002-11-24 18:53:11 +00:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  | from unicodedata import normalize | 
					
						
							| 
									
										
										
										
											2002-11-24 18:53:11 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-10 11:59:47 +00:00
										 |  |  | TESTDATAFILE = "NormalizationTest" + os.extsep + "txt" | 
					
						
							|  |  |  | TESTDATAURL = "http://www.unicode.org/Public/4.1.0/ucd/" + TESTDATAFILE | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-03-24 08:02:51 +00:00
										 |  |  | class RangeError(Exception): | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def NFC(str): | 
					
						
							|  |  |  |     return normalize("NFC", str) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def NFKC(str): | 
					
						
							|  |  |  |     return normalize("NFKC", str) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def NFD(str): | 
					
						
							|  |  |  |     return normalize("NFD", str) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def NFKD(str): | 
					
						
							|  |  |  |     return normalize("NFKD", str) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def unistr(data): | 
					
						
							|  |  |  |     data = [int(x, 16) for x in data.split(" ")] | 
					
						
							|  |  |  |     for x in data: | 
					
						
							|  |  |  |         if x > sys.maxunicode: | 
					
						
							|  |  |  |             raise RangeError | 
					
						
							|  |  |  |     return u"".join([unichr(x) for x in data]) | 
					
						
							| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  |      | 
					
						
							|  |  |  | class NormalizationTest(unittest.TestCase): | 
					
						
							|  |  |  |     def test_main(self): | 
					
						
							|  |  |  |         part1_data = {} | 
					
						
							|  |  |  |         for line in open_urlresource(TESTDATAURL): | 
					
						
							|  |  |  |             if '#' in line: | 
					
						
							|  |  |  |                 line = line.split('#')[0] | 
					
						
							|  |  |  |             line = line.strip() | 
					
						
							|  |  |  |             if not line: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             if line.startswith("@Part"): | 
					
						
							|  |  |  |                 part = line.split()[0] | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             if part == "@Part3": | 
					
						
							|  |  |  |                 # XXX we don't support PRI #29 yet, so skip these tests for now | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] | 
					
						
							|  |  |  |             except RangeError: | 
					
						
							|  |  |  |                 # Skip unsupported characters; | 
					
						
							|  |  |  |                 # try atleast adding c1 if we are in part1 | 
					
						
							|  |  |  |                 if part == "@Part1": | 
					
						
							|  |  |  |                     try: | 
					
						
							|  |  |  |                         c1 = unistr(line.split(';')[0]) | 
					
						
							|  |  |  |                     except RangeError: | 
					
						
							|  |  |  |                         pass | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         part1_data[c1] = 1 | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  |             # Perform tests | 
					
						
							|  |  |  |             self.failUnless(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line) | 
					
						
							|  |  |  |             self.failUnless(c4 ==  NFC(c4) ==  NFC(c5), line) | 
					
						
							|  |  |  |             self.failUnless(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line) | 
					
						
							|  |  |  |             self.failUnless(c5 ==  NFD(c4) ==  NFD(c5), line) | 
					
						
							|  |  |  |             self.failUnless(c4 == NFKC(c1) == NFKC(c2) == \ | 
					
						
							|  |  |  |                             NFKC(c3) == NFKC(c4) == NFKC(c5), | 
					
						
							|  |  |  |                             line) | 
					
						
							|  |  |  |             self.failUnless(c5 == NFKD(c1) == NFKD(c2) == \ | 
					
						
							|  |  |  |                             NFKD(c3) == NFKD(c4) == NFKD(c5), | 
					
						
							|  |  |  |                             line) | 
					
						
							| 
									
										
										
										
											2002-11-24 18:53:11 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  |             # Record part 1 data | 
					
						
							|  |  |  |             if part == "@Part1": | 
					
						
							|  |  |  |                 part1_data[c1] = 1 | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  |         # Perform tests for all other data | 
					
						
							|  |  |  |         for c in range(sys.maxunicode+1): | 
					
						
							|  |  |  |             X = unichr(c) | 
					
						
							|  |  |  |             if X in part1_data: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             self.failUnless(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) | 
					
						
							| 
									
										
										
										
											2002-11-24 02:35:35 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  |     def test_bug_834676(self): | 
					
						
							|  |  |  |         # Check for bug 834676 | 
					
						
							|  |  |  |         normalize('NFC', u'\ud55c\uae00') | 
					
						
							| 
									
										
										
										
											2002-11-23 22:08:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-04-05 20:08:56 +00:00
										 |  |  | def test_main(): | 
					
						
							|  |  |  |     run_unittest(NormalizationTest) | 
					
						
							| 
									
										
										
										
											2003-11-06 20:47:57 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2002-11-24 18:53:11 +00:00
										 |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     test_main() |