mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	bpo-40328: Add tool for generating cjk mapping headers (GH-19602)
This commit is contained in:
		
							parent
							
								
									2d8757758d
								
							
						
					
					
						commit
						113feb3ec2
					
				
					 15 changed files with 51015 additions and 3 deletions
				
			
		|  | @ -0,0 +1 @@ | |||
| Add tools for generating mappings headers for CJKCodecs. | ||||
|  | @ -1,8 +1,6 @@ | |||
| To generate or modify mapping headers | ||||
| ------------------------------------- | ||||
| Mapping headers are imported from CJKCodecs as pre-generated form. | ||||
| If you need to tweak or add something on it, please look at tools/ | ||||
| subdirectory of CJKCodecs' distribution. | ||||
| Mapping headers are generated from Tools/unicode/genmap_*.py | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| // AUTO-GENERATED FILE FROM genmap_schinese.py: DO NOT EDIT
 | ||||
| static const ucs2_t __gb2312_decmap[7482] = { | ||||
| 12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216, | ||||
| 8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303, | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| // AUTO-GENERATED FILE FROM genmap_japanese.py: DO NOT EDIT
 | ||||
| #define JISX0213_ENCPAIRS 46 | ||||
| #ifdef EXTERN_JISX0213_PAIR | ||||
| static const struct widedbcs_index *jisx0213_pair_decmap; | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| // AUTO-GENERATED FILE FROM genmap_japanese.py: DO NOT EDIT
 | ||||
| static const ucs2_t __jisx0208_decmap[6956] = { | ||||
| 12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180, | ||||
| 65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294, | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| // AUTO-GENERATED FILE FROM genmap_korean.py: DO NOT EDIT
 | ||||
| static const ucs2_t __ksx1001_decmap[8264] = { | ||||
| 12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217, | ||||
| 8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304, | ||||
|  | @ -3249,3 +3250,4 @@ __cp949_encmap+31959,0,255},{__cp949_encmap+32215,0,255},{__cp949_encmap+32471 | |||
| __cp949_encmap+32891,0,11},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp949_encmap+ | ||||
| 32903,1,230}, | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										251
									
								
								Tools/unicode/genmap_japanese.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										251
									
								
								Tools/unicode/genmap_japanese.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,251 @@ | |||
| # | ||||
| # genmap_ja_codecs.py: Japanese Codecs Map Generator | ||||
| # | ||||
| # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||
| # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||
| # | ||||
| import os | ||||
| 
 | ||||
| from genmap_support import * | ||||
| 
 | ||||
| JISX0208_C1 = (0x21, 0x74) | ||||
| JISX0208_C2 = (0x21, 0x7e) | ||||
| JISX0212_C1 = (0x22, 0x6d) | ||||
| JISX0212_C2 = (0x21, 0x7e) | ||||
| JISX0213_C1 = (0x21, 0x7e) | ||||
| JISX0213_C2 = (0x21, 0x7e) | ||||
| CP932P0_C1  = (0x81, 0x81) # patches between shift-jis and cp932 | ||||
| CP932P0_C2  = (0x5f, 0xca) | ||||
| CP932P1_C1  = (0x87, 0x87) # CP932 P1 | ||||
| CP932P1_C2  = (0x40, 0x9c) | ||||
| CP932P2_C1  = (0xed, 0xfc) # CP932 P2 | ||||
| CP932P2_C2  = (0x40, 0xfc) | ||||
| 
 | ||||
| MAPPINGS_JIS0208 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT' | ||||
| MAPPINGS_JIS0212 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT' | ||||
| MAPPINGS_CP932 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT' | ||||
| MAPPINGS_JISX0213_2004 = 'http://wakaba-web.hp.infoseek.co.jp/table/jisx0213-2004-std.txt' | ||||
| 
 | ||||
| 
 | ||||
| def loadmap_jisx0213(fo): | ||||
|     decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4 | ||||
|     decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4 | ||||
|     decmap3_pair = {} # maps to BMP-pair for level 3 | ||||
|     for line in fo: | ||||
|         line = line.split('#', 1)[0].strip() | ||||
|         if not line or len(line.split()) < 2: | ||||
|             continue | ||||
| 
 | ||||
|         row = line.split() | ||||
|         loc = eval('0x' + row[0][2:]) | ||||
|         level = eval(row[0][0]) | ||||
|         m = None | ||||
|         if len(row[1].split('+')) == 2: # single unicode | ||||
|             uni = eval('0x' + row[1][2:]) | ||||
|             if level == 3: | ||||
|                 if uni < 0x10000: | ||||
|                     m = decmap3 | ||||
|                 elif 0x20000 <= uni < 0x30000: | ||||
|                     uni -= 0x20000 | ||||
|                     m = decmap3_2 | ||||
|             elif level == 4: | ||||
|                 if uni < 0x10000: | ||||
|                     m = decmap4 | ||||
|                 elif 0x20000 <= uni < 0x30000: | ||||
|                     uni -= 0x20000 | ||||
|                     m = decmap4_2 | ||||
|             m.setdefault((loc >> 8), {}) | ||||
|             m[(loc >> 8)][(loc & 0xff)] = uni | ||||
|         else: # pair | ||||
|             uniprefix = eval('0x' + row[1][2:6]) # body | ||||
|             uni = eval('0x' + row[1][7:11]) # modifier | ||||
|             if level != 3: | ||||
|                 raise ValueError("invalid map") | ||||
|             decmap3_pair.setdefault(uniprefix, {}) | ||||
|             m = decmap3_pair[uniprefix] | ||||
| 
 | ||||
|         if m is None: | ||||
|             raise ValueError("invalid map") | ||||
|         m.setdefault((loc >> 8), {}) | ||||
|         m[(loc >> 8)][(loc & 0xff)] = uni | ||||
| 
 | ||||
|     return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     jisx0208file = open_mapping_file('python-mappings/JIS0208.TXT', MAPPINGS_JIS0208) | ||||
|     jisx0212file = open_mapping_file('python-mappings/JIS0212.TXT', MAPPINGS_JIS0212) | ||||
|     cp932file = open_mapping_file('python-mappings/CP932.TXT', MAPPINGS_CP932) | ||||
|     jisx0213file = open_mapping_file('python-mappings/jisx0213-2004-std.txt', MAPPINGS_JISX0213_2004) | ||||
| 
 | ||||
|     print("Loading Mapping File...") | ||||
| 
 | ||||
|     sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) | ||||
|     jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) | ||||
|     jisx0212decmap = loadmap(jisx0212file) | ||||
|     cp932decmap = loadmap(cp932file) | ||||
|     jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file) | ||||
| 
 | ||||
|     if jis3decmap[0x21][0x24] != 0xff0c: | ||||
|         raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff') | ||||
| 
 | ||||
|     sjisencmap, cp932encmap = {}, {} | ||||
|     jisx0208_0212encmap = {} | ||||
|     for c1, m in sjisdecmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             sjisencmap.setdefault(code >> 8, {}) | ||||
|             sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||
|     for c1, m in cp932decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             cp932encmap.setdefault(code >> 8, {}) | ||||
|             if (code & 0xff) not in cp932encmap[code >> 8]: | ||||
|                 cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||
|     for c1, m in cp932encmap.copy().items(): | ||||
|         for c2, code in m.copy().items(): | ||||
|             if c1 in sjisencmap and c2 in sjisencmap[c1] and sjisencmap[c1][c2] == code: | ||||
|                 del cp932encmap[c1][c2] | ||||
|                 if not cp932encmap[c1]: | ||||
|                     del cp932encmap[c1] | ||||
| 
 | ||||
|     jisx0213pairdecmap = {} | ||||
|     jisx0213pairencmap = [] | ||||
|     for unibody, m1 in jis3_pairdecmap.items(): | ||||
|         for c1, m2 in m1.items(): | ||||
|             for c2, modifier in m2.items(): | ||||
|                 jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2)) | ||||
|                 jisx0213pairdecmap.setdefault(c1, {}) | ||||
|                 jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier | ||||
| 
 | ||||
|     # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) | ||||
|     for c1, m in jisx0208decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             jisx0208_0212encmap.setdefault(code >> 8, {}) | ||||
|             jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||
| 
 | ||||
|     for c1, m in jisx0212decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             jisx0208_0212encmap.setdefault(code >> 8, {}) | ||||
|             if (code & 0xff) in jisx0208_0212encmap[code >> 8]: | ||||
|                 print("OOPS!!!", (code)) | ||||
|             jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 | ||||
| 
 | ||||
|     jisx0213bmpencmap = {} | ||||
|     for c1, m in jis3decmap.copy().items(): | ||||
|         for c2, code in m.copy().items(): | ||||
|             if c1 in jisx0208decmap and c2 in jisx0208decmap[c1]: | ||||
|                 if code in jis3_pairdecmap: | ||||
|                     jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair | ||||
|                     jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) | ||||
|                 elif jisx0208decmap[c1][c2] == code: | ||||
|                     del jis3decmap[c1][c2] | ||||
|                     if not jis3decmap[c1]: | ||||
|                         del jis3decmap[c1] | ||||
|                 else: | ||||
|                     raise ValueError("Difference between JIS X 0208 and JIS X 0213 Plane 1 is found.") | ||||
|             else: | ||||
|                 jisx0213bmpencmap.setdefault(code >> 8, {}) | ||||
|                 if code not in jis3_pairdecmap: | ||||
|                     jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||
|                 else: | ||||
|                     jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair | ||||
|                     jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) | ||||
| 
 | ||||
|     for c1, m in jis4decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             jisx0213bmpencmap.setdefault(code >> 8, {}) | ||||
|             jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 | ||||
| 
 | ||||
|     jisx0213empencmap = {} | ||||
|     for c1, m in jis3_2_decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             jisx0213empencmap.setdefault(code >> 8, {}) | ||||
|             jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||
|     for c1, m in jis4_2_decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             jisx0213empencmap.setdefault(code >> 8, {}) | ||||
|             jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 | ||||
| 
 | ||||
|     with open("mappings_jp.h", "w") as fp: | ||||
|         print_autogen(fp, os.path.basename(__file__)) | ||||
|         print("Generating JIS X 0208 decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0208", jisx0208decmap) | ||||
|         writer.update_decode_map(JISX0208_C1, JISX0208_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0212 decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0212", jisx0212decmap) | ||||
|         writer.update_decode_map(JISX0212_C1, JISX0212_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0208 && JIS X 0212 encode map...") | ||||
|         writer = EncodeMapWriter(fp, "jisxcommon", jisx0208_0212encmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating CP932 Extension decode map...") | ||||
|         writer = DecodeMapWriter(fp, "cp932ext", cp932decmap) | ||||
|         writer.update_decode_map(CP932P0_C1, CP932P0_C2) | ||||
|         writer.update_decode_map(CP932P1_C1, CP932P1_C2) | ||||
|         writer.update_decode_map(CP932P2_C1, CP932P2_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating CP932 Extension encode map...") | ||||
|         writer = EncodeMapWriter(fp, "cp932ext", cp932encmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0213 Plane 1 BMP decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0213_1_bmp", jis3decmap) | ||||
|         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0213 Plane 2 BMP decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0213_2_bmp", jis4decmap) | ||||
|         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0213 BMP encode map...") | ||||
|         writer = EncodeMapWriter(fp, "jisx0213_bmp", jisx0213bmpencmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0213 Plane 1 EMP decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0213_1_emp", jis3_2_decmap) | ||||
|         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0213 Plane 2 EMP decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0213_2_emp", jis4_2_decmap) | ||||
|         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating JIS X 0213 EMP encode map...") | ||||
|         writer = EncodeMapWriter(fp, "jisx0213_emp", jisx0213empencmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|     with open('mappings_jisx0213_pair.h', 'w') as fp: | ||||
|         print_autogen(fp, os.path.basename(__file__)) | ||||
|         fp.write(f"#define JISX0213_ENCPAIRS {len(jisx0213pairencmap)}\n") | ||||
|         fp.write("""\ | ||||
| #ifdef EXTERN_JISX0213_PAIR | ||||
| static const struct widedbcs_index *jisx0213_pair_decmap; | ||||
| static const struct pair_encodemap *jisx0213_pair_encmap; | ||||
| #else | ||||
| """) | ||||
| 
 | ||||
|         print("Generating JIS X 0213 unicode-pair decode map...") | ||||
|         writer = DecodeMapWriter(fp, "jisx0213_pair", jisx0213pairdecmap) | ||||
|         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||
|         writer.generate(wide=True) | ||||
| 
 | ||||
|         print("Generating JIS X 0213 unicode-pair encode map...") | ||||
|         jisx0213pairencmap.sort() | ||||
|         fp.write("static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {\n") | ||||
|         filler = BufferedFiller() | ||||
|         for body, modifier, jis in jisx0213pairencmap: | ||||
|             filler.write('{', '0x%04x%04x,' % (body, modifier), '0x%04x' % jis, '},') | ||||
|         filler.printout(fp) | ||||
|         fp.write("};\n") | ||||
|         fp.write("#endif\n") | ||||
| 
 | ||||
|     print("Done!") | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										62
									
								
								Tools/unicode/genmap_korean.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								Tools/unicode/genmap_korean.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,62 @@ | |||
| # | ||||
| # genmap_korean.py: Korean Codecs Map Generator | ||||
| # | ||||
| # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||
| # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||
| # | ||||
| import os | ||||
| 
 | ||||
| from genmap_support import * | ||||
| 
 | ||||
| 
 | ||||
| KSX1001_C1 = (0x21, 0x7e) | ||||
| KSX1001_C2 = (0x21, 0x7e) | ||||
| UHCL1_C1 = (0x81, 0xa0) | ||||
| UHCL1_C2 = (0x41, 0xfe) | ||||
| UHCL2_C1 = (0xa1, 0xfe) | ||||
| UHCL2_C2 = (0x41, 0xa0) | ||||
| MAPPINGS_CP949 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT' | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     mapfile = open_mapping_file('python-mappings/CP949.TXT', MAPPINGS_CP949) | ||||
|     print("Loading Mapping File...") | ||||
|     decmap = loadmap(mapfile) | ||||
|     uhcdecmap, ksx1001decmap, cp949encmap = {}, {}, {} | ||||
|     for c1, c2map in decmap.items(): | ||||
|         for c2, code in c2map.items(): | ||||
|             if c1 >= 0xa1 and c2 >= 0xa1: | ||||
|                 ksx1001decmap.setdefault(c1 & 0x7f, {}) | ||||
|                 ksx1001decmap[c1 & 0x7f][c2 & 0x7f] = c2map[c2] | ||||
|                 cp949encmap.setdefault(code >> 8, {}) | ||||
|                 cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2) & 0x7f7f | ||||
|             else: | ||||
|                 # uhc | ||||
|                 uhcdecmap.setdefault(c1, {}) | ||||
|                 uhcdecmap[c1][c2] = c2map[c2] | ||||
|                 cp949encmap.setdefault(code >> 8, {})  # MSB set | ||||
|                 cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2) | ||||
| 
 | ||||
|     with open('mappings_kr.h', 'w') as fp: | ||||
|         print_autogen(fp, os.path.basename(__file__)) | ||||
| 
 | ||||
|         print("Generating KS X 1001 decode map...") | ||||
|         writer = DecodeMapWriter(fp, "ksx1001", ksx1001decmap) | ||||
|         writer.update_decode_map(KSX1001_C1, KSX1001_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating UHC decode map...") | ||||
|         writer = DecodeMapWriter(fp, "cp949ext", uhcdecmap) | ||||
|         writer.update_decode_map(UHCL1_C1, UHCL1_C2) | ||||
|         writer.update_decode_map(UHCL2_C1, UHCL2_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating CP949 (includes KS X 1001) encode map...") | ||||
|         writer = EncodeMapWriter(fp, "cp949", cp949encmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|     print("Done!") | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										149
									
								
								Tools/unicode/genmap_schinese.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								Tools/unicode/genmap_schinese.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,149 @@ | |||
| # | ||||
| # genmap_schinese.py: Simplified Chinese Codecs Map Generator | ||||
| # | ||||
| # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||
| # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||
| # | ||||
| import os | ||||
| import re | ||||
| 
 | ||||
| from genmap_support import * | ||||
| 
 | ||||
| 
 | ||||
| GB2312_C1   = (0x21, 0x7e) | ||||
| GB2312_C2   = (0x21, 0x7e) | ||||
| GBKL1_C1    = (0x81, 0xa8) | ||||
| GBKL1_C2    = (0x40, 0xfe) | ||||
| GBKL2_C1    = (0xa9, 0xfe) | ||||
| GBKL2_C2    = (0x40, 0xa0) | ||||
| GB18030EXTP1_C1 = (0xa1, 0xa9) | ||||
| GB18030EXTP1_C2 = (0x40, 0xfe) | ||||
| GB18030EXTP2_C1 = (0xaa, 0xaf) | ||||
| GB18030EXTP2_C2 = (0xa1, 0xfe) | ||||
| GB18030EXTP3_C1 = (0xd7, 0xd7) | ||||
| GB18030EXTP3_C2 = (0xfa, 0xfe) | ||||
| GB18030EXTP4_C1 = (0xf8, 0xfd) | ||||
| GB18030EXTP4_C2 = (0xa1, 0xfe) | ||||
| GB18030EXTP5_C1 = (0xfe, 0xfe) | ||||
| GB18030EXTP5_C2 = (0x50, 0xfe) | ||||
| 
 | ||||
| MAPPINGS_GB2312 = 'http://people.freebsd.org/~perky/i18n/GB2312.TXT' | ||||
| MAPPINGS_CP936 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT' | ||||
| MAPPINGS_GB18030 = 'http://oss.software.ibm.com/cvs/icu/~checkout~/charset/data/xml/gb-18030-2000.xml' | ||||
| 
 | ||||
| re_gb18030ass = re.compile('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>') | ||||
| 
 | ||||
| 
 | ||||
| def parse_gb18030map(fo): | ||||
|     m, gbuni = {}, {} | ||||
|     for i in range(65536): | ||||
|         if i < 0xd800 or i > 0xdfff: # exclude unicode surrogate area | ||||
|             gbuni[i] = None | ||||
|     for uni, native in re_gb18030ass.findall(fo.read()): | ||||
|         uni = eval('0x'+uni) | ||||
|         native = [eval('0x'+u) for u in native.split()] | ||||
|         if len(native) <= 2: | ||||
|             del gbuni[uni] | ||||
|         if len(native) == 2: # we can decode algorithmically for 1 or 4 bytes | ||||
|             m.setdefault(native[0], {}) | ||||
|             m[native[0]][native[1]] = uni | ||||
|     gbuni = [k for k in gbuni.keys()] | ||||
|     gbuni.sort() | ||||
|     return m, gbuni | ||||
| 
 | ||||
| def main(): | ||||
|     print("Loading Mapping File...") | ||||
|     gb2312map = open_mapping_file('python-mappings/GB2312.TXT', MAPPINGS_GB2312) | ||||
|     cp936map = open_mapping_file('python-mappings/CP936.TXT', MAPPINGS_CP936) | ||||
|     gb18030map = open_mapping_file('python-mappings/gb-18030-2000.xml', MAPPINGS_GB18030) | ||||
| 
 | ||||
|     gb18030decmap, gb18030unilinear = parse_gb18030map(gb18030map) | ||||
|     gbkdecmap = loadmap(cp936map) | ||||
|     gb2312decmap = loadmap(gb2312map) | ||||
|     difmap = {} | ||||
|     for c1, m in gbkdecmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             del gb18030decmap[c1][c2] | ||||
|             if not gb18030decmap[c1]: | ||||
|                 del gb18030decmap[c1] | ||||
|     for c1, m in gb2312decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             gbkc1, gbkc2 = c1 | 0x80, c2 | 0x80 | ||||
|             if gbkdecmap[gbkc1][gbkc2] == code: | ||||
|                 del gbkdecmap[gbkc1][gbkc2] | ||||
|                 if not gbkdecmap[gbkc1]: | ||||
|                     del gbkdecmap[gbkc1] | ||||
| 
 | ||||
|     gb2312_gbkencmap, gb18030encmap = {}, {} | ||||
|     for c1, m in gbkdecmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             gb2312_gbkencmap.setdefault(code >> 8, {}) | ||||
|             gb2312_gbkencmap[code >> 8][code & 0xff] = c1 << 8 | c2 # MSB set | ||||
|     for c1, m in gb2312decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             gb2312_gbkencmap.setdefault(code >> 8, {}) | ||||
|             gb2312_gbkencmap[code >> 8][code & 0xff] = c1 << 8 | c2 # MSB unset | ||||
|     for c1, m in gb18030decmap.items(): | ||||
|         for c2, code in m.items(): | ||||
|             gb18030encmap.setdefault(code >> 8, {}) | ||||
|             gb18030encmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||
| 
 | ||||
|     with open('mappings_cn.h', 'w') as fp: | ||||
|         print_autogen(fp, os.path.basename(__file__)) | ||||
| 
 | ||||
|         print("Generating GB2312 decode map...") | ||||
|         writer = DecodeMapWriter(fp, "gb2312", gb2312decmap) | ||||
|         writer.update_decode_map(GB2312_C1, GB2312_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating GBK decode map...") | ||||
|         writer = DecodeMapWriter(fp, "gbkext", gbkdecmap) | ||||
|         writer.update_decode_map(GBKL1_C1, GBKL1_C2) | ||||
|         writer.update_decode_map(GBKL2_C1, GBKL2_C2) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating GB2312 && GBK encode map...") | ||||
|         writer = EncodeMapWriter(fp, "gbcommon", gb2312_gbkencmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating GB18030 extension decode map...") | ||||
|         writer = DecodeMapWriter(fp, "gb18030ext", gb18030decmap) | ||||
|         for i in range(1, 6): | ||||
|             writer.update_decode_map(eval("GB18030EXTP%d_C1" % i), eval("GB18030EXTP%d_C2" % i)) | ||||
| 
 | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating GB18030 extension encode map...") | ||||
|         writer = EncodeMapWriter(fp, "gb18030ext", gb18030encmap) | ||||
|         writer.generate() | ||||
| 
 | ||||
|         print("Generating GB18030 Unicode BMP Mapping Ranges...") | ||||
|         ranges = [[-1, -1, -1]] | ||||
|         gblinnum = 0 | ||||
|         fp.write(""" | ||||
| static const struct _gb18030_to_unibmp_ranges { | ||||
|     Py_UCS4   first, last; | ||||
|     DBCHAR       base; | ||||
| } gb18030_to_unibmp_ranges[] = { | ||||
| """) | ||||
| 
 | ||||
|         for uni in gb18030unilinear: | ||||
|             if uni == ranges[-1][1] + 1: | ||||
|                 ranges[-1][1] = uni | ||||
|             else: | ||||
|                 ranges.append([uni, uni, gblinnum]) | ||||
|             gblinnum += 1 | ||||
| 
 | ||||
|         filler = BufferedFiller() | ||||
|         for first, last, base in ranges[1:]: | ||||
|             filler.write('{', str(first), ',', str(last), ',', str(base), '},') | ||||
| 
 | ||||
|         filler.write('{', '0,', '0,', str( | ||||
|             ranges[-1][2] + ranges[-1][1] - ranges[-1][0] + 1), '}', '};') | ||||
|         filler.printout(fp) | ||||
| 
 | ||||
|     print("Done!") | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										198
									
								
								Tools/unicode/genmap_support.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								Tools/unicode/genmap_support.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,198 @@ | |||
| # | ||||
| # genmap_support.py: Multibyte Codec Map Generator | ||||
| # | ||||
| # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||
| # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||
| # | ||||
| 
 | ||||
| 
 | ||||
| class BufferedFiller: | ||||
|     def __init__(self, column=78): | ||||
|         self.column = column | ||||
|         self.buffered = [] | ||||
|         self.cline = [] | ||||
|         self.clen = 0 | ||||
|         self.count = 0 | ||||
| 
 | ||||
|     def write(self, *data): | ||||
|         for s in data: | ||||
|             if len(s) > self.column: | ||||
|                 raise ValueError("token is too long") | ||||
|             if len(s) + self.clen > self.column: | ||||
|                 self.flush() | ||||
|             self.clen += len(s) | ||||
|             self.cline.append(s) | ||||
|             self.count += 1 | ||||
| 
 | ||||
|     def flush(self): | ||||
|         if not self.cline: | ||||
|             return | ||||
|         self.buffered.append(''.join(self.cline)) | ||||
|         self.clen = 0 | ||||
|         del self.cline[:] | ||||
| 
 | ||||
|     def printout(self, fp): | ||||
|         self.flush() | ||||
|         for l in self.buffered: | ||||
|             fp.write(f'{l}\n') | ||||
|         del self.buffered[:] | ||||
| 
 | ||||
|     def __len__(self): | ||||
|         return self.count | ||||
| 
 | ||||
| 
 | ||||
| class DecodeMapWriter: | ||||
|     filler_class = BufferedFiller | ||||
| 
 | ||||
|     def __init__(self, fp, prefix, decode_map): | ||||
|         self.fp = fp | ||||
|         self.prefix = prefix | ||||
|         self.decode_map = decode_map | ||||
|         self.filler = self.filler_class() | ||||
| 
 | ||||
|     def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): | ||||
|         c2values = range(c2range[0], c2range[1] + 1) | ||||
| 
 | ||||
|         for c1 in range(c1range[0], c1range[1] + 1): | ||||
|             if c1 not in self.decode_map or (onlymask and c1 not in onlymask): | ||||
|                 continue | ||||
|             c2map = self.decode_map[c1] | ||||
|             rc2values = [n for n in c2values if n in c2map] | ||||
|             if not rc2values: | ||||
|                 continue | ||||
| 
 | ||||
|             c2map[self.prefix] = True | ||||
|             c2map['min'] = rc2values[0] | ||||
|             c2map['max'] = rc2values[-1] | ||||
|             c2map['midx'] = len(self.filler) | ||||
| 
 | ||||
|             for v in range(rc2values[0], rc2values[-1] + 1): | ||||
|                 if v in c2map: | ||||
|                     self.filler.write('%d,' % c2map[v]) | ||||
|                 else: | ||||
|                     self.filler.write('U,') | ||||
| 
 | ||||
|     def generate(self, wide=False): | ||||
|         if not wide: | ||||
|             self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") | ||||
|         else: | ||||
|             self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") | ||||
| 
 | ||||
|         self.filler.printout(self.fp) | ||||
|         self.fp.write("};\n\n") | ||||
| 
 | ||||
|         if not wide: | ||||
|             self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") | ||||
|         else: | ||||
|             self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") | ||||
| 
 | ||||
|         for i in range(256): | ||||
|             if i in self.decode_map and self.prefix in self.decode_map[i]: | ||||
|                 m = self.decode_map | ||||
|                 prefix = self.prefix | ||||
|             else: | ||||
|                 self.filler.write("{", "0,", "0,", "0", "},") | ||||
|                 continue | ||||
| 
 | ||||
|             self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], | ||||
|                               ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") | ||||
|         self.filler.printout(self.fp) | ||||
|         self.fp.write("};\n\n") | ||||
| 
 | ||||
| 
 | ||||
| class EncodeMapWriter: | ||||
|     filler_class = BufferedFiller | ||||
|     elemtype = 'DBCHAR' | ||||
|     indextype = 'struct unim_index' | ||||
| 
 | ||||
|     def __init__(self, fp, prefix, encode_map): | ||||
|         self.fp = fp | ||||
|         self.prefix = prefix | ||||
|         self.encode_map = encode_map | ||||
|         self.filler = self.filler_class() | ||||
| 
 | ||||
|     def generate(self): | ||||
|         self.buildmap() | ||||
|         self.printmap() | ||||
| 
 | ||||
|     def buildmap(self): | ||||
|         for c1 in range(0, 256): | ||||
|             if c1 not in self.encode_map: | ||||
|                 continue | ||||
|             c2map = self.encode_map[c1] | ||||
|             rc2values = [k for k in c2map.keys()] | ||||
|             rc2values.sort() | ||||
|             if not rc2values: | ||||
|                 continue | ||||
| 
 | ||||
|             c2map[self.prefix] = True | ||||
|             c2map['min'] = rc2values[0] | ||||
|             c2map['max'] = rc2values[-1] | ||||
|             c2map['midx'] = len(self.filler) | ||||
| 
 | ||||
|             for v in range(rc2values[0], rc2values[-1] + 1): | ||||
|                 if v not in c2map: | ||||
|                     self.write_nochar() | ||||
|                 elif isinstance(c2map[v], int): | ||||
|                     self.write_char(c2map[v]) | ||||
|                 elif isinstance(c2map[v], tuple): | ||||
|                     self.write_multic(c2map[v]) | ||||
|                 else: | ||||
|                     raise ValueError | ||||
| 
 | ||||
|     def write_nochar(self): | ||||
|         self.filler.write('N,') | ||||
| 
 | ||||
|     def write_multic(self, point): | ||||
|         self.filler.write('M,') | ||||
| 
 | ||||
|     def write_char(self, point): | ||||
|         self.filler.write(str(point) + ',') | ||||
| 
 | ||||
|     def printmap(self): | ||||
|         self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") | ||||
|         self.filler.printout(self.fp) | ||||
|         self.fp.write("};\n\n") | ||||
|         self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") | ||||
| 
 | ||||
|         for i in range(256): | ||||
|             if i in self.encode_map and self.prefix in self.encode_map[i]: | ||||
|                 self.filler.write("{", "__%s_encmap" % self.prefix, "+", | ||||
|                                   "%d" % self.encode_map[i]['midx'], ",", | ||||
|                                   "%d," % self.encode_map[i]['min'], | ||||
|                                   "%d" % self.encode_map[i]['max'], "},") | ||||
|             else: | ||||
|                 self.filler.write("{", "0,", "0,", "0", "},") | ||||
|                 continue | ||||
|         self.filler.printout(self.fp) | ||||
|         self.fp.write("};\n\n") | ||||
| 
 | ||||
| 
 | ||||
| def open_mapping_file(path, source): | ||||
|     try: | ||||
|         f = open(path) | ||||
|     except IOError: | ||||
|         raise SystemExit(f'{source} is needed') | ||||
|     return f | ||||
| 
 | ||||
| 
 | ||||
| def print_autogen(fo, source): | ||||
|     fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') | ||||
| 
 | ||||
| 
 | ||||
| def loadmap(fo, natcol=0, unicol=1, sbcs=0): | ||||
|     print("Loading from", fo) | ||||
|     fo.seek(0, 0) | ||||
|     decmap = {} | ||||
|     for line in fo: | ||||
|         line = line.split('#', 1)[0].strip() | ||||
|         if not line or len(line.split()) < 2: | ||||
|             continue | ||||
| 
 | ||||
|         row = [eval(e) for e in line.split()] | ||||
|         loc, uni = row[natcol], row[unicol] | ||||
|         if loc >= 0x100 or sbcs: | ||||
|             decmap.setdefault((loc >> 8), {}) | ||||
|             decmap[(loc >> 8)][(loc & 0xff)] = uni | ||||
| 
 | ||||
|     return decmap | ||||
							
								
								
									
										7515
									
								
								Tools/unicode/python-mappings/GB2312.TXT
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7515
									
								
								Tools/unicode/python-mappings/GB2312.TXT
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										271
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2000-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										271
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2000-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,271 @@ | |||
| --- jisx0213-2000-std.txt.orig	Tue Apr 16 23:32:38 2002
 | ||||
| +++ jisx0213-2000-std.txt	Wed Jun 16 14:49:05 2004
 | ||||
| @@ -23,21 +23,21 @@
 | ||||
|  3-2121	U+3000	# IDEOGRAPHIC SPACE | ||||
|  3-2122	U+3001	# IDEOGRAPHIC COMMA | ||||
|  3-2123	U+3002	# IDEOGRAPHIC FULL STOP | ||||
| -3-2124	U+002C	# COMMA	Fullwidth: U+FF0C
 | ||||
| -3-2125	U+002E	# FULL STOP	Fullwidth: U+FF0E
 | ||||
| +3-2124	U+FF0C	# COMMA	Fullwidth: U+FF0C
 | ||||
| +3-2125	U+FF0E	# FULL STOP	Fullwidth: U+FF0E
 | ||||
|  3-2126	U+30FB	# KATAKANA MIDDLE DOT | ||||
| -3-2127	U+003A	# COLON	Fullwidth: U+FF1A
 | ||||
| -3-2128	U+003B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||
| -3-2129	U+003F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||
| -3-212A	U+0021	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||
| +3-2127	U+FF1A	# COLON	Fullwidth: U+FF1A
 | ||||
| +3-2128	U+FF1B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||
| +3-2129	U+FF1F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||
| +3-212A	U+FF01	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||
|  3-212B	U+309B	# KATAKANA-HIRAGANA VOICED SOUND MARK | ||||
|  3-212C	U+309C	# KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK | ||||
|  3-212D	U+00B4	# ACUTE ACCENT | ||||
| -3-212E	U+0060	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||
| +3-212E	U+FF40	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||
|  3-212F	U+00A8	# DIAERESIS | ||||
| -3-2130	U+005E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||
| -3-2131	U+203E	# OVERLINE	Windows: U+FFE3
 | ||||
| -3-2132	U+005F	# LOW LINE	Fullwidth: U+FF3F
 | ||||
| +3-2130	U+FF3E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||
| +3-2131	U+FFE3	# OVERLINE	Windows: U+FFE3
 | ||||
| +3-2132	U+FF3F	# LOW LINE	Fullwidth: U+FF3F
 | ||||
|  3-2133	U+30FD	# KATAKANA ITERATION MARK | ||||
|  3-2134	U+30FE	# KATAKANA VOICED ITERATION MARK | ||||
|  3-2135	U+309D	# HIRAGANA ITERATION MARK | ||||
| @@ -48,27 +48,27 @@
 | ||||
|  3-213A	U+3006	# IDEOGRAPHIC CLOSING MARK | ||||
|  3-213B	U+3007	# IDEOGRAPHIC NUMBER ZERO | ||||
|  3-213C	U+30FC	# KATAKANA-HIRAGANA PROLONGED SOUND MARK | ||||
| -3-213D	U+2014	# EM DASH	Windows: U+2015
 | ||||
| +3-213D	U+2015	# EM DASH	Windows: U+2015
 | ||||
|  3-213E	U+2010	# HYPHEN | ||||
| -3-213F	U+002F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||
| +3-213F	U+FF0F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||
|  3-2140	U+005C	# REVERSE SOLIDUS	Fullwidth: U+FF3C | ||||
|  3-2141	U+301C	# WAVE DASH	Windows: U+FF5E | ||||
|  3-2142	U+2016	# DOUBLE VERTICAL LINE	Windows: U+2225 | ||||
| -3-2143	U+007C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||
| +3-2143	U+FF5C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||
|  3-2144	U+2026	# HORIZONTAL ELLIPSIS | ||||
|  3-2145	U+2025	# TWO DOT LEADER | ||||
|  3-2146	U+2018	# LEFT SINGLE QUOTATION MARK | ||||
|  3-2147	U+2019	# RIGHT SINGLE QUOTATION MARK | ||||
|  3-2148	U+201C	# LEFT DOUBLE QUOTATION MARK | ||||
|  3-2149	U+201D	# RIGHT DOUBLE QUOTATION MARK | ||||
| -3-214A	U+0028	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||
| -3-214B	U+0029	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||
| +3-214A	U+FF08	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||
| +3-214B	U+FF09	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||
|  3-214C	U+3014	# LEFT TORTOISE SHELL BRACKET | ||||
|  3-214D	U+3015	# RIGHT TORTOISE SHELL BRACKET | ||||
| -3-214E	U+005B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||
| -3-214F	U+005D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||
| -3-2150	U+007B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||
| -3-2151	U+007D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||
| +3-214E	U+FF3B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||
| +3-214F	U+FF3D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||
| +3-2150	U+FF5B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||
| +3-2151	U+FF5D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||
|  3-2152	U+3008	# LEFT ANGLE BRACKET | ||||
|  3-2153	U+3009	# RIGHT ANGLE BRACKET | ||||
|  3-2154	U+300A	# LEFT DOUBLE ANGLE BRACKET | ||||
| @@ -79,15 +79,15 @@
 | ||||
|  3-2159	U+300F	# RIGHT WHITE CORNER BRACKET | ||||
|  3-215A	U+3010	# LEFT BLACK LENTICULAR BRACKET | ||||
|  3-215B	U+3011	# RIGHT BLACK LENTICULAR BRACKET | ||||
| -3-215C	U+002B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||
| +3-215C	U+FF0B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||
|  3-215D	U+2212	# MINUS SIGN	Windows: U+FF0D | ||||
|  3-215E	U+00B1	# PLUS-MINUS SIGN | ||||
|  3-215F	U+00D7	# MULTIPLICATION SIGN | ||||
|  3-2160	U+00F7	# DIVISION SIGN | ||||
| -3-2161	U+003D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||
| +3-2161	U+FF1D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||
|  3-2162	U+2260	# NOT EQUAL TO | ||||
| -3-2163	U+003C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||
| -3-2164	U+003E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||
| +3-2163	U+FF1C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||
| +3-2164	U+FF1E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||
|  3-2165	U+2266	# LESS-THAN OVER EQUAL TO | ||||
|  3-2166	U+2267	# GREATER-THAN OVER EQUAL TO | ||||
|  3-2167	U+221E	# INFINITY | ||||
| @@ -98,15 +98,15 @@
 | ||||
|  3-216C	U+2032	# PRIME | ||||
|  3-216D	U+2033	# DOUBLE PRIME | ||||
|  3-216E	U+2103	# DEGREE CELSIUS | ||||
| -3-216F	U+00A5	# YEN SIGN	Windows: U+FFE5
 | ||||
| -3-2170	U+0024	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||
| +3-216F	U+FFE5	# YEN SIGN	Windows: U+FFE5
 | ||||
| +3-2170	U+FF04	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||
|  3-2171	U+00A2	# CENT SIGN	Windows: U+FFE0 | ||||
|  3-2172	U+00A3	# POUND SIGN	Windows: U+FFE1 | ||||
| -3-2173	U+0025	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||
| -3-2174	U+0023	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||
| -3-2175	U+0026	# AMPERSAND	Fullwidth: U+FF06
 | ||||
| -3-2176	U+002A	# ASTERISK	Fullwidth: U+FF0A
 | ||||
| -3-2177	U+0040	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||
| +3-2173	U+FF05	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||
| +3-2174	U+FF03	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||
| +3-2175	U+FF06	# AMPERSAND	Fullwidth: U+FF06
 | ||||
| +3-2176	U+FF0A	# ASTERISK	Fullwidth: U+FF0A
 | ||||
| +3-2177	U+FF20	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||
|  3-2178	U+00A7	# SECTION SIGN | ||||
|  3-2179	U+2606	# WHITE STAR | ||||
|  3-217A	U+2605	# BLACK STAR | ||||
| @@ -128,9 +128,9 @@
 | ||||
|  3-222C	U+2191	# UPWARDS ARROW | ||||
|  3-222D	U+2193	# DOWNWARDS ARROW | ||||
|  3-222E	U+3013	# GETA MARK | ||||
| -3-222F	U+0027	# APOSTROPHE	Fullwidth: U+FF07
 | ||||
| -3-2230	U+0022	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||
| -3-2231	U+002D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||
| +3-222F	U+FF07	# APOSTROPHE	Fullwidth: U+FF07
 | ||||
| +3-2230	U+FF02	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||
| +3-2231	U+FF0D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||
|  3-2232	U+007E	# TILDE	[2000]	Fullwidth: U+FF5E | ||||
|  3-2233	U+3033	# VERTICAL KANA REPEAT MARK UPPER HALF	[2000] | ||||
|  3-2234	U+3034	# VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF	[2000] | ||||
| @@ -223,16 +223,16 @@
 | ||||
|  3-232D	U+21E9	# DOWNWARDS WHITE ARROW	[2000] | ||||
|  3-232E	U+2934	# ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS	[2000]	[Unicode3.2] | ||||
|  3-232F	U+2935	# ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS	[2000]	[Unicode3.2] | ||||
| -3-2330	U+0030	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||
| -3-2331	U+0031	# DIGIT ONE	Fullwidth: U+FF11
 | ||||
| -3-2332	U+0032	# DIGIT TWO	Fullwidth: U+FF12
 | ||||
| -3-2333	U+0033	# DIGIT THREE	Fullwidth: U+FF13
 | ||||
| -3-2334	U+0034	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||
| -3-2335	U+0035	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||
| -3-2336	U+0036	# DIGIT SIX	Fullwidth: U+FF16
 | ||||
| -3-2337	U+0037	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||
| -3-2338	U+0038	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||
| -3-2339	U+0039	# DIGIT NINE	Fullwidth: U+FF19
 | ||||
| +3-2330	U+FF10	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||
| +3-2331	U+FF11	# DIGIT ONE	Fullwidth: U+FF11
 | ||||
| +3-2332	U+FF12	# DIGIT TWO	Fullwidth: U+FF12
 | ||||
| +3-2333	U+FF13	# DIGIT THREE	Fullwidth: U+FF13
 | ||||
| +3-2334	U+FF14	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||
| +3-2335	U+FF15	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||
| +3-2336	U+FF16	# DIGIT SIX	Fullwidth: U+FF16
 | ||||
| +3-2337	U+FF17	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||
| +3-2338	U+FF18	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||
| +3-2339	U+FF19	# DIGIT NINE	Fullwidth: U+FF19
 | ||||
|  3-233A	U+29BF	# CIRCLED BULLET	[2000]	[Unicode3.2] | ||||
|  3-233B	U+25C9	# FISHEYE	[2000] | ||||
|  3-233C	U+303D	# PART ALTERNATION MARK	[2000]	[Unicode3.2] | ||||
| @@ -240,64 +240,64 @@
 | ||||
|  3-233E	U+FE45	# SESAME DOT	[2000]	[Unicode3.2] | ||||
|  3-233F	U+25E6	# WHITE BULLET	[2000] | ||||
|  3-2340	U+2022	# BULLET	[2000] | ||||
| -3-2341	U+0041	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||
| -3-2342	U+0042	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||
| -3-2343	U+0043	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||
| -3-2344	U+0044	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||
| -3-2345	U+0045	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||
| -3-2346	U+0046	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||
| -3-2347	U+0047	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||
| -3-2348	U+0048	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||
| -3-2349	U+0049	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||
| -3-234A	U+004A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||
| -3-234B	U+004B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||
| -3-234C	U+004C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||
| -3-234D	U+004D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||
| -3-234E	U+004E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||
| -3-234F	U+004F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||
| -3-2350	U+0050	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||
| -3-2351	U+0051	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||
| -3-2352	U+0052	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||
| -3-2353	U+0053	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||
| -3-2354	U+0054	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||
| -3-2355	U+0055	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||
| -3-2356	U+0056	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||
| -3-2357	U+0057	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||
| -3-2358	U+0058	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||
| -3-2359	U+0059	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||
| -3-235A	U+005A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||
| +3-2341	U+FF21	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||
| +3-2342	U+FF22	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||
| +3-2343	U+FF23	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||
| +3-2344	U+FF24	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||
| +3-2345	U+FF25	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||
| +3-2346	U+FF26	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||
| +3-2347	U+FF27	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||
| +3-2348	U+FF28	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||
| +3-2349	U+FF29	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||
| +3-234A	U+FF2A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||
| +3-234B	U+FF2B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||
| +3-234C	U+FF2C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||
| +3-234D	U+FF2D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||
| +3-234E	U+FF2E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||
| +3-234F	U+FF2F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||
| +3-2350	U+FF30	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||
| +3-2351	U+FF31	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||
| +3-2352	U+FF32	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||
| +3-2353	U+FF33	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||
| +3-2354	U+FF34	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||
| +3-2355	U+FF35	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||
| +3-2356	U+FF36	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||
| +3-2357	U+FF37	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||
| +3-2358	U+FF38	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||
| +3-2359	U+FF39	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||
| +3-235A	U+FF3A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||
|  3-235B	U+2213	# MINUS-OR-PLUS SIGN	[2000] | ||||
|  3-235C	U+2135	# ALEF SYMBOL	[2000] | ||||
|  3-235D	U+210F	# PLANCK CONSTANT OVER TWO PI	[2000] | ||||
|  3-235E	U+33CB	# SQUARE HP	[2000] | ||||
|  3-235F	U+2113	# SCRIPT SMALL L	[2000] | ||||
|  3-2360	U+2127	# INVERTED OHM SIGN	[2000] | ||||
| -3-2361	U+0061	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||
| -3-2362	U+0062	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||
| -3-2363	U+0063	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||
| -3-2364	U+0064	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||
| -3-2365	U+0065	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||
| -3-2366	U+0066	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||
| -3-2367	U+0067	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||
| -3-2368	U+0068	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||
| -3-2369	U+0069	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||
| -3-236A	U+006A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||
| -3-236B	U+006B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||
| -3-236C	U+006C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||
| -3-236D	U+006D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||
| -3-236E	U+006E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||
| -3-236F	U+006F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||
| -3-2370	U+0070	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||
| -3-2371	U+0071	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||
| -3-2372	U+0072	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||
| -3-2373	U+0073	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||
| -3-2374	U+0074	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||
| -3-2375	U+0075	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||
| -3-2376	U+0076	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||
| -3-2377	U+0077	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||
| -3-2378	U+0078	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||
| -3-2379	U+0079	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||
| -3-237A	U+007A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||
| +3-2361	U+FF41	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||
| +3-2362	U+FF42	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||
| +3-2363	U+FF43	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||
| +3-2364	U+FF44	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||
| +3-2365	U+FF45	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||
| +3-2366	U+FF46	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||
| +3-2367	U+FF47	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||
| +3-2368	U+FF48	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||
| +3-2369	U+FF49	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||
| +3-236A	U+FF4A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||
| +3-236B	U+FF4B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||
| +3-236C	U+FF4C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||
| +3-236D	U+FF4D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||
| +3-236E	U+FF4E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||
| +3-236F	U+FF4F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||
| +3-2370	U+FF50	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||
| +3-2371	U+FF51	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||
| +3-2372	U+FF52	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||
| +3-2373	U+FF53	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||
| +3-2374	U+FF54	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||
| +3-2375	U+FF55	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||
| +3-2376	U+FF56	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||
| +3-2377	U+FF57	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||
| +3-2378	U+FF58	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||
| +3-2379	U+FF59	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||
| +3-237A	U+FF5A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||
|  3-237B	U+30A0	# KATAKANA-HIRAGANA DOUBLE HYPHEN	[2000]	[Unicode3.2] | ||||
|  3-237C	U+2013	# EN DASH	[2000] | ||||
|  3-237D	U+29FA	# DOUBLE PLUS	[2000]	[Unicode3.2] | ||||
							
								
								
									
										351
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2004-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										351
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2004-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,351 @@ | |||
| --- jisx0213-2000-std.txt.orig	Tue Apr 16 23:32:38 2002
 | ||||
| +++ jisx0213-2004-std.txt	Thu Jul  8 11:51:54 2004
 | ||||
| @@ -1,6 +1,6 @@
 | ||||
| -## JIS X 0213:2000 vs Unicode mapping table
 | ||||
| +## JIS X 0213:2004 vs Unicode mapping table
 | ||||
|  ##  | ||||
| -## Date: 16 Apr 2002 13:09:49 GMT
 | ||||
| +## Date: 7 Jul 2004 13:09:49 GMT
 | ||||
|  ## License: | ||||
|  ## 	Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved. | ||||
|  ## 	Copyright (C) 2001 I'O, All Rights Reserved. | ||||
| @@ -23,21 +23,21 @@
 | ||||
|  3-2121	U+3000	# IDEOGRAPHIC SPACE | ||||
|  3-2122	U+3001	# IDEOGRAPHIC COMMA | ||||
|  3-2123	U+3002	# IDEOGRAPHIC FULL STOP | ||||
| -3-2124	U+002C	# COMMA	Fullwidth: U+FF0C
 | ||||
| -3-2125	U+002E	# FULL STOP	Fullwidth: U+FF0E
 | ||||
| +3-2124	U+FF0C	# COMMA	Fullwidth: U+FF0C
 | ||||
| +3-2125	U+FF0E	# FULL STOP	Fullwidth: U+FF0E
 | ||||
|  3-2126	U+30FB	# KATAKANA MIDDLE DOT | ||||
| -3-2127	U+003A	# COLON	Fullwidth: U+FF1A
 | ||||
| -3-2128	U+003B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||
| -3-2129	U+003F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||
| -3-212A	U+0021	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||
| +3-2127	U+FF1A	# COLON	Fullwidth: U+FF1A
 | ||||
| +3-2128	U+FF1B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||
| +3-2129	U+FF1F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||
| +3-212A	U+FF01	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||
|  3-212B	U+309B	# KATAKANA-HIRAGANA VOICED SOUND MARK | ||||
|  3-212C	U+309C	# KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK | ||||
|  3-212D	U+00B4	# ACUTE ACCENT | ||||
| -3-212E	U+0060	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||
| +3-212E	U+FF40	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||
|  3-212F	U+00A8	# DIAERESIS | ||||
| -3-2130	U+005E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||
| -3-2131	U+203E	# OVERLINE	Windows: U+FFE3
 | ||||
| -3-2132	U+005F	# LOW LINE	Fullwidth: U+FF3F
 | ||||
| +3-2130	U+FF3E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||
| +3-2131	U+FFE3	# OVERLINE	Windows: U+FFE3
 | ||||
| +3-2132	U+FF3F	# LOW LINE	Fullwidth: U+FF3F
 | ||||
|  3-2133	U+30FD	# KATAKANA ITERATION MARK | ||||
|  3-2134	U+30FE	# KATAKANA VOICED ITERATION MARK | ||||
|  3-2135	U+309D	# HIRAGANA ITERATION MARK | ||||
| @@ -48,27 +48,27 @@
 | ||||
|  3-213A	U+3006	# IDEOGRAPHIC CLOSING MARK | ||||
|  3-213B	U+3007	# IDEOGRAPHIC NUMBER ZERO | ||||
|  3-213C	U+30FC	# KATAKANA-HIRAGANA PROLONGED SOUND MARK | ||||
| -3-213D	U+2014	# EM DASH	Windows: U+2015
 | ||||
| +3-213D	U+2015	# EM DASH	Windows: U+2015
 | ||||
|  3-213E	U+2010	# HYPHEN | ||||
| -3-213F	U+002F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||
| +3-213F	U+FF0F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||
|  3-2140	U+005C	# REVERSE SOLIDUS	Fullwidth: U+FF3C | ||||
|  3-2141	U+301C	# WAVE DASH	Windows: U+FF5E | ||||
|  3-2142	U+2016	# DOUBLE VERTICAL LINE	Windows: U+2225 | ||||
| -3-2143	U+007C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||
| +3-2143	U+FF5C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||
|  3-2144	U+2026	# HORIZONTAL ELLIPSIS | ||||
|  3-2145	U+2025	# TWO DOT LEADER | ||||
|  3-2146	U+2018	# LEFT SINGLE QUOTATION MARK | ||||
|  3-2147	U+2019	# RIGHT SINGLE QUOTATION MARK | ||||
|  3-2148	U+201C	# LEFT DOUBLE QUOTATION MARK | ||||
|  3-2149	U+201D	# RIGHT DOUBLE QUOTATION MARK | ||||
| -3-214A	U+0028	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||
| -3-214B	U+0029	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||
| +3-214A	U+FF08	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||
| +3-214B	U+FF09	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||
|  3-214C	U+3014	# LEFT TORTOISE SHELL BRACKET | ||||
|  3-214D	U+3015	# RIGHT TORTOISE SHELL BRACKET | ||||
| -3-214E	U+005B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||
| -3-214F	U+005D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||
| -3-2150	U+007B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||
| -3-2151	U+007D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||
| +3-214E	U+FF3B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||
| +3-214F	U+FF3D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||
| +3-2150	U+FF5B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||
| +3-2151	U+FF5D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||
|  3-2152	U+3008	# LEFT ANGLE BRACKET | ||||
|  3-2153	U+3009	# RIGHT ANGLE BRACKET | ||||
|  3-2154	U+300A	# LEFT DOUBLE ANGLE BRACKET | ||||
| @@ -79,15 +79,15 @@
 | ||||
|  3-2159	U+300F	# RIGHT WHITE CORNER BRACKET | ||||
|  3-215A	U+3010	# LEFT BLACK LENTICULAR BRACKET | ||||
|  3-215B	U+3011	# RIGHT BLACK LENTICULAR BRACKET | ||||
| -3-215C	U+002B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||
| +3-215C	U+FF0B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||
|  3-215D	U+2212	# MINUS SIGN	Windows: U+FF0D | ||||
|  3-215E	U+00B1	# PLUS-MINUS SIGN | ||||
|  3-215F	U+00D7	# MULTIPLICATION SIGN | ||||
|  3-2160	U+00F7	# DIVISION SIGN | ||||
| -3-2161	U+003D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||
| +3-2161	U+FF1D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||
|  3-2162	U+2260	# NOT EQUAL TO | ||||
| -3-2163	U+003C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||
| -3-2164	U+003E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||
| +3-2163	U+FF1C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||
| +3-2164	U+FF1E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||
|  3-2165	U+2266	# LESS-THAN OVER EQUAL TO | ||||
|  3-2166	U+2267	# GREATER-THAN OVER EQUAL TO | ||||
|  3-2167	U+221E	# INFINITY | ||||
| @@ -98,15 +98,15 @@
 | ||||
|  3-216C	U+2032	# PRIME | ||||
|  3-216D	U+2033	# DOUBLE PRIME | ||||
|  3-216E	U+2103	# DEGREE CELSIUS | ||||
| -3-216F	U+00A5	# YEN SIGN	Windows: U+FFE5
 | ||||
| -3-2170	U+0024	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||
| +3-216F	U+FFE5	# YEN SIGN	Windows: U+FFE5
 | ||||
| +3-2170	U+FF04	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||
|  3-2171	U+00A2	# CENT SIGN	Windows: U+FFE0 | ||||
|  3-2172	U+00A3	# POUND SIGN	Windows: U+FFE1 | ||||
| -3-2173	U+0025	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||
| -3-2174	U+0023	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||
| -3-2175	U+0026	# AMPERSAND	Fullwidth: U+FF06
 | ||||
| -3-2176	U+002A	# ASTERISK	Fullwidth: U+FF0A
 | ||||
| -3-2177	U+0040	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||
| +3-2173	U+FF05	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||
| +3-2174	U+FF03	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||
| +3-2175	U+FF06	# AMPERSAND	Fullwidth: U+FF06
 | ||||
| +3-2176	U+FF0A	# ASTERISK	Fullwidth: U+FF0A
 | ||||
| +3-2177	U+FF20	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||
|  3-2178	U+00A7	# SECTION SIGN | ||||
|  3-2179	U+2606	# WHITE STAR | ||||
|  3-217A	U+2605	# BLACK STAR | ||||
| @@ -128,9 +128,9 @@
 | ||||
|  3-222C	U+2191	# UPWARDS ARROW | ||||
|  3-222D	U+2193	# DOWNWARDS ARROW | ||||
|  3-222E	U+3013	# GETA MARK | ||||
| -3-222F	U+0027	# APOSTROPHE	Fullwidth: U+FF07
 | ||||
| -3-2230	U+0022	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||
| -3-2231	U+002D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||
| +3-222F	U+FF07	# APOSTROPHE	Fullwidth: U+FF07
 | ||||
| +3-2230	U+FF02	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||
| +3-2231	U+FF0D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||
|  3-2232	U+007E	# TILDE	[2000]	Fullwidth: U+FF5E | ||||
|  3-2233	U+3033	# VERTICAL KANA REPEAT MARK UPPER HALF	[2000] | ||||
|  3-2234	U+3034	# VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF	[2000] | ||||
| @@ -223,16 +223,16 @@
 | ||||
|  3-232D	U+21E9	# DOWNWARDS WHITE ARROW	[2000] | ||||
|  3-232E	U+2934	# ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS	[2000]	[Unicode3.2] | ||||
|  3-232F	U+2935	# ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS	[2000]	[Unicode3.2] | ||||
| -3-2330	U+0030	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||
| -3-2331	U+0031	# DIGIT ONE	Fullwidth: U+FF11
 | ||||
| -3-2332	U+0032	# DIGIT TWO	Fullwidth: U+FF12
 | ||||
| -3-2333	U+0033	# DIGIT THREE	Fullwidth: U+FF13
 | ||||
| -3-2334	U+0034	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||
| -3-2335	U+0035	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||
| -3-2336	U+0036	# DIGIT SIX	Fullwidth: U+FF16
 | ||||
| -3-2337	U+0037	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||
| -3-2338	U+0038	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||
| -3-2339	U+0039	# DIGIT NINE	Fullwidth: U+FF19
 | ||||
| +3-2330	U+FF10	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||
| +3-2331	U+FF11	# DIGIT ONE	Fullwidth: U+FF11
 | ||||
| +3-2332	U+FF12	# DIGIT TWO	Fullwidth: U+FF12
 | ||||
| +3-2333	U+FF13	# DIGIT THREE	Fullwidth: U+FF13
 | ||||
| +3-2334	U+FF14	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||
| +3-2335	U+FF15	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||
| +3-2336	U+FF16	# DIGIT SIX	Fullwidth: U+FF16
 | ||||
| +3-2337	U+FF17	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||
| +3-2338	U+FF18	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||
| +3-2339	U+FF19	# DIGIT NINE	Fullwidth: U+FF19
 | ||||
|  3-233A	U+29BF	# CIRCLED BULLET	[2000]	[Unicode3.2] | ||||
|  3-233B	U+25C9	# FISHEYE	[2000] | ||||
|  3-233C	U+303D	# PART ALTERNATION MARK	[2000]	[Unicode3.2] | ||||
| @@ -240,64 +240,64 @@
 | ||||
|  3-233E	U+FE45	# SESAME DOT	[2000]	[Unicode3.2] | ||||
|  3-233F	U+25E6	# WHITE BULLET	[2000] | ||||
|  3-2340	U+2022	# BULLET	[2000] | ||||
| -3-2341	U+0041	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||
| -3-2342	U+0042	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||
| -3-2343	U+0043	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||
| -3-2344	U+0044	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||
| -3-2345	U+0045	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||
| -3-2346	U+0046	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||
| -3-2347	U+0047	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||
| -3-2348	U+0048	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||
| -3-2349	U+0049	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||
| -3-234A	U+004A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||
| -3-234B	U+004B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||
| -3-234C	U+004C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||
| -3-234D	U+004D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||
| -3-234E	U+004E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||
| -3-234F	U+004F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||
| -3-2350	U+0050	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||
| -3-2351	U+0051	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||
| -3-2352	U+0052	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||
| -3-2353	U+0053	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||
| -3-2354	U+0054	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||
| -3-2355	U+0055	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||
| -3-2356	U+0056	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||
| -3-2357	U+0057	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||
| -3-2358	U+0058	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||
| -3-2359	U+0059	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||
| -3-235A	U+005A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||
| +3-2341	U+FF21	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||
| +3-2342	U+FF22	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||
| +3-2343	U+FF23	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||
| +3-2344	U+FF24	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||
| +3-2345	U+FF25	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||
| +3-2346	U+FF26	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||
| +3-2347	U+FF27	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||
| +3-2348	U+FF28	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||
| +3-2349	U+FF29	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||
| +3-234A	U+FF2A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||
| +3-234B	U+FF2B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||
| +3-234C	U+FF2C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||
| +3-234D	U+FF2D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||
| +3-234E	U+FF2E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||
| +3-234F	U+FF2F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||
| +3-2350	U+FF30	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||
| +3-2351	U+FF31	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||
| +3-2352	U+FF32	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||
| +3-2353	U+FF33	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||
| +3-2354	U+FF34	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||
| +3-2355	U+FF35	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||
| +3-2356	U+FF36	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||
| +3-2357	U+FF37	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||
| +3-2358	U+FF38	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||
| +3-2359	U+FF39	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||
| +3-235A	U+FF3A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||
|  3-235B	U+2213	# MINUS-OR-PLUS SIGN	[2000] | ||||
|  3-235C	U+2135	# ALEF SYMBOL	[2000] | ||||
|  3-235D	U+210F	# PLANCK CONSTANT OVER TWO PI	[2000] | ||||
|  3-235E	U+33CB	# SQUARE HP	[2000] | ||||
|  3-235F	U+2113	# SCRIPT SMALL L	[2000] | ||||
|  3-2360	U+2127	# INVERTED OHM SIGN	[2000] | ||||
| -3-2361	U+0061	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||
| -3-2362	U+0062	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||
| -3-2363	U+0063	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||
| -3-2364	U+0064	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||
| -3-2365	U+0065	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||
| -3-2366	U+0066	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||
| -3-2367	U+0067	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||
| -3-2368	U+0068	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||
| -3-2369	U+0069	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||
| -3-236A	U+006A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||
| -3-236B	U+006B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||
| -3-236C	U+006C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||
| -3-236D	U+006D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||
| -3-236E	U+006E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||
| -3-236F	U+006F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||
| -3-2370	U+0070	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||
| -3-2371	U+0071	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||
| -3-2372	U+0072	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||
| -3-2373	U+0073	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||
| -3-2374	U+0074	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||
| -3-2375	U+0075	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||
| -3-2376	U+0076	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||
| -3-2377	U+0077	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||
| -3-2378	U+0078	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||
| -3-2379	U+0079	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||
| -3-237A	U+007A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||
| +3-2361	U+FF41	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||
| +3-2362	U+FF42	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||
| +3-2363	U+FF43	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||
| +3-2364	U+FF44	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||
| +3-2365	U+FF45	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||
| +3-2366	U+FF46	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||
| +3-2367	U+FF47	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||
| +3-2368	U+FF48	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||
| +3-2369	U+FF49	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||
| +3-236A	U+FF4A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||
| +3-236B	U+FF4B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||
| +3-236C	U+FF4C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||
| +3-236D	U+FF4D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||
| +3-236E	U+FF4E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||
| +3-236F	U+FF4F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||
| +3-2370	U+FF50	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||
| +3-2371	U+FF51	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||
| +3-2372	U+FF52	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||
| +3-2373	U+FF53	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||
| +3-2374	U+FF54	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||
| +3-2375	U+FF55	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||
| +3-2376	U+FF56	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||
| +3-2377	U+FF57	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||
| +3-2378	U+FF58	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||
| +3-2379	U+FF59	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||
| +3-237A	U+FF5A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||
|  3-237B	U+30A0	# KATAKANA-HIRAGANA DOUBLE HYPHEN	[2000]	[Unicode3.2] | ||||
|  3-237C	U+2013	# EN DASH	[2000] | ||||
|  3-237D	U+29FA	# DOUBLE PLUS	[2000]	[Unicode3.2] | ||||
| @@ -1242,7 +1242,7 @@
 | ||||
|  3-2D7C		# <reserved>	Windows: U+222A | ||||
|  3-2D7D	U+2756	# BLACK DIAMOND MINUS WHITE X	[2000] | ||||
|  3-2D7E	U+261E	# WHITE RIGHT POINTING INDEX	[2000] | ||||
| -3-2E21		# <reserved>
 | ||||
| +3-2E21	U+4FF1	# <cjk> [2004]
 | ||||
|  3-2E22	U+2000B	# <cjk>	[2000]	[Unicode3.1]	Private: U+F780 | ||||
|  3-2E23	U+3402	# <cjk>	[2000] | ||||
|  3-2E24	U+4E28	# <cjk>	[2000] | ||||
| @@ -1429,7 +1429,7 @@
 | ||||
|  3-2F7B	U+218BD	# <cjk>	[2000]	[Unicode3.1]	Private: U+F78F | ||||
|  3-2F7C	U+5B19	# <cjk>	[2000] | ||||
|  3-2F7D	U+5B25	# <cjk>	[2000] | ||||
| -3-2F7E		# <reserved>
 | ||||
| +3-2F7E	U+525D	# <cjk> [2004]
 | ||||
|  3-3021	U+4E9C	# <cjk> | ||||
|  3-3022	U+5516	# <cjk> | ||||
|  3-3023	U+5A03	# <cjk> | ||||
| @@ -4395,7 +4395,7 @@
 | ||||
|  3-4F51	U+6E7E	# <cjk> | ||||
|  3-4F52	U+7897	# <cjk> | ||||
|  3-4F53	U+8155	# <cjk> | ||||
| -3-4F54		# <reserved>
 | ||||
| +3-4F54	U+20B9F	# <cjk> [2004]
 | ||||
|  3-4F55	U+5B41	# <cjk>	[2000] | ||||
|  3-4F56	U+5B56	# <cjk>	[2000] | ||||
|  3-4F57	U+5B7D	# <cjk>	[2000] | ||||
| @@ -4437,7 +4437,7 @@
 | ||||
|  3-4F7B	U+5DA7	# <cjk>	[2000] | ||||
|  3-4F7C	U+5DB8	# <cjk>	[2000] | ||||
|  3-4F7D	U+5DCB	# <cjk>	[2000] | ||||
| -3-4F7E		# <reserved>
 | ||||
| +3-4F7E	U+541E	# <cjk> [2004]
 | ||||
|  3-5021	U+5F0C	# <cjk> | ||||
|  3-5022	U+4E10	# <cjk> | ||||
|  3-5023	U+4E15	# <cjk> | ||||
| @@ -7828,7 +7828,7 @@
 | ||||
|  3-7424	U+7464	# <cjk>	[1983] | ||||
|  3-7425	U+51DC	# <cjk>	[1990] | ||||
|  3-7426	U+7199	# <cjk>	[1990] | ||||
| -3-7427		# <reserved>
 | ||||
| +3-7427	U+5653	# <cjk> [2004]
 | ||||
|  3-7428	U+5DE2	# <cjk>	[2000] | ||||
|  3-7429	U+5E14	# <cjk>	[2000] | ||||
|  3-742A	U+5E18	# <cjk>	[2000] | ||||
| @@ -8851,11 +8851,11 @@
 | ||||
|  3-7E77	U+9F94	# <cjk>	[2000] | ||||
|  3-7E78	U+9F97	# <cjk>	[2000] | ||||
|  3-7E79	U+9FA2	# <cjk>	[2000] | ||||
| -3-7E7A		# <reserved>
 | ||||
| -3-7E7B		# <reserved>
 | ||||
| -3-7E7C		# <reserved>
 | ||||
| -3-7E7D		# <reserved>
 | ||||
| -3-7E7E		# <reserved>
 | ||||
| +3-7E7A	U+59F8	# <cjk> [2004]
 | ||||
| +3-7E7B	U+5C5B	# <cjk> [2004]
 | ||||
| +3-7E7C	U+5E77	# <cjk> [2004]
 | ||||
| +3-7E7D	U+7626	# <cjk> [2004]
 | ||||
| +3-7E7E	U+7E6B	# <cjk> [2004]
 | ||||
|  4-2121	U+20089	# <cjk>	[2000]	[Unicode3.1]	Private: U+F7D1 | ||||
|  4-2122	U+4E02	# <cjk>	[2000] | ||||
|  4-2123	U+4E0F	# <cjk>	[2000] | ||||
| @@ -11138,7 +11138,7 @@
 | ||||
|  4-7D38	U+9B10	# <cjk>	[2000] | ||||
|  4-7D39	U+9B12	# <cjk>	[2000] | ||||
|  4-7D3A	U+9B16	# <cjk>	[2000] | ||||
| -4-7D3B	U+9B1D	# <cjk>	[2000]
 | ||||
| +4-7D3B	U+9B1C	# <cjk>	[2000]
 | ||||
|  4-7D3C	U+9B2B	# <cjk>	[2000] | ||||
|  4-7D3D	U+9B33	# <cjk>	[2000] | ||||
|  4-7D3E	U+9B3D	# <cjk>	[2000] | ||||
							
								
								
									
										30917
									
								
								Tools/unicode/python-mappings/gb-18030-2000.xml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										30917
									
								
								Tools/unicode/python-mappings/gb-18030-2000.xml
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										11294
									
								
								Tools/unicode/python-mappings/jisx0213-2004-std.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11294
									
								
								Tools/unicode/python-mappings/jisx0213-2004-std.txt
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dong-hee Na
						Dong-hee Na