mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	bpo-40328: Add tool for generating cjk mapping headers (GH-19602)
This commit is contained in:
		
							parent
							
								
									2d8757758d
								
							
						
					
					
						commit
						113feb3ec2
					
				
					 15 changed files with 51015 additions and 3 deletions
				
			
		|  | @ -0,0 +1 @@ | ||||||
|  | Add tools for generating mappings headers for CJKCodecs. | ||||||
|  | @ -1,8 +1,6 @@ | ||||||
| To generate or modify mapping headers | To generate or modify mapping headers | ||||||
| ------------------------------------- | ------------------------------------- | ||||||
| Mapping headers are imported from CJKCodecs as pre-generated form. | Mapping headers are generated from Tools/unicode/genmap_*.py | ||||||
| If you need to tweak or add something on it, please look at tools/ |  | ||||||
| subdirectory of CJKCodecs' distribution. |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | // AUTO-GENERATED FILE FROM genmap_schinese.py: DO NOT EDIT
 | ||||||
| static const ucs2_t __gb2312_decmap[7482] = { | static const ucs2_t __gb2312_decmap[7482] = { | ||||||
| 12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216, | 12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216, | ||||||
| 8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303, | 8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303, | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | // AUTO-GENERATED FILE FROM genmap_japanese.py: DO NOT EDIT
 | ||||||
| #define JISX0213_ENCPAIRS 46 | #define JISX0213_ENCPAIRS 46 | ||||||
| #ifdef EXTERN_JISX0213_PAIR | #ifdef EXTERN_JISX0213_PAIR | ||||||
| static const struct widedbcs_index *jisx0213_pair_decmap; | static const struct widedbcs_index *jisx0213_pair_decmap; | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | // AUTO-GENERATED FILE FROM genmap_japanese.py: DO NOT EDIT
 | ||||||
| static const ucs2_t __jisx0208_decmap[6956] = { | static const ucs2_t __jisx0208_decmap[6956] = { | ||||||
| 12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180, | 12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180, | ||||||
| 65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294, | 65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294, | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | // AUTO-GENERATED FILE FROM genmap_korean.py: DO NOT EDIT
 | ||||||
| static const ucs2_t __ksx1001_decmap[8264] = { | static const ucs2_t __ksx1001_decmap[8264] = { | ||||||
| 12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217, | 12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217, | ||||||
| 8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304, | 8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304, | ||||||
|  | @ -3249,3 +3250,4 @@ __cp949_encmap+31959,0,255},{__cp949_encmap+32215,0,255},{__cp949_encmap+32471 | ||||||
| __cp949_encmap+32891,0,11},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp949_encmap+ | __cp949_encmap+32891,0,11},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp949_encmap+ | ||||||
| 32903,1,230}, | 32903,1,230}, | ||||||
| }; | }; | ||||||
|  | 
 | ||||||
|  |  | ||||||
							
								
								
									
										251
									
								
								Tools/unicode/genmap_japanese.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										251
									
								
								Tools/unicode/genmap_japanese.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,251 @@ | ||||||
|  | # | ||||||
|  | # genmap_ja_codecs.py: Japanese Codecs Map Generator | ||||||
|  | # | ||||||
|  | # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||||
|  | # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||||
|  | # | ||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | from genmap_support import * | ||||||
|  | 
 | ||||||
|  | JISX0208_C1 = (0x21, 0x74) | ||||||
|  | JISX0208_C2 = (0x21, 0x7e) | ||||||
|  | JISX0212_C1 = (0x22, 0x6d) | ||||||
|  | JISX0212_C2 = (0x21, 0x7e) | ||||||
|  | JISX0213_C1 = (0x21, 0x7e) | ||||||
|  | JISX0213_C2 = (0x21, 0x7e) | ||||||
|  | CP932P0_C1  = (0x81, 0x81) # patches between shift-jis and cp932 | ||||||
|  | CP932P0_C2  = (0x5f, 0xca) | ||||||
|  | CP932P1_C1  = (0x87, 0x87) # CP932 P1 | ||||||
|  | CP932P1_C2  = (0x40, 0x9c) | ||||||
|  | CP932P2_C1  = (0xed, 0xfc) # CP932 P2 | ||||||
|  | CP932P2_C2  = (0x40, 0xfc) | ||||||
|  | 
 | ||||||
|  | MAPPINGS_JIS0208 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT' | ||||||
|  | MAPPINGS_JIS0212 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT' | ||||||
|  | MAPPINGS_CP932 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT' | ||||||
|  | MAPPINGS_JISX0213_2004 = 'http://wakaba-web.hp.infoseek.co.jp/table/jisx0213-2004-std.txt' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def loadmap_jisx0213(fo): | ||||||
|  |     decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4 | ||||||
|  |     decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4 | ||||||
|  |     decmap3_pair = {} # maps to BMP-pair for level 3 | ||||||
|  |     for line in fo: | ||||||
|  |         line = line.split('#', 1)[0].strip() | ||||||
|  |         if not line or len(line.split()) < 2: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         row = line.split() | ||||||
|  |         loc = eval('0x' + row[0][2:]) | ||||||
|  |         level = eval(row[0][0]) | ||||||
|  |         m = None | ||||||
|  |         if len(row[1].split('+')) == 2: # single unicode | ||||||
|  |             uni = eval('0x' + row[1][2:]) | ||||||
|  |             if level == 3: | ||||||
|  |                 if uni < 0x10000: | ||||||
|  |                     m = decmap3 | ||||||
|  |                 elif 0x20000 <= uni < 0x30000: | ||||||
|  |                     uni -= 0x20000 | ||||||
|  |                     m = decmap3_2 | ||||||
|  |             elif level == 4: | ||||||
|  |                 if uni < 0x10000: | ||||||
|  |                     m = decmap4 | ||||||
|  |                 elif 0x20000 <= uni < 0x30000: | ||||||
|  |                     uni -= 0x20000 | ||||||
|  |                     m = decmap4_2 | ||||||
|  |             m.setdefault((loc >> 8), {}) | ||||||
|  |             m[(loc >> 8)][(loc & 0xff)] = uni | ||||||
|  |         else: # pair | ||||||
|  |             uniprefix = eval('0x' + row[1][2:6]) # body | ||||||
|  |             uni = eval('0x' + row[1][7:11]) # modifier | ||||||
|  |             if level != 3: | ||||||
|  |                 raise ValueError("invalid map") | ||||||
|  |             decmap3_pair.setdefault(uniprefix, {}) | ||||||
|  |             m = decmap3_pair[uniprefix] | ||||||
|  | 
 | ||||||
|  |         if m is None: | ||||||
|  |             raise ValueError("invalid map") | ||||||
|  |         m.setdefault((loc >> 8), {}) | ||||||
|  |         m[(loc >> 8)][(loc & 0xff)] = uni | ||||||
|  | 
 | ||||||
|  |     return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     jisx0208file = open_mapping_file('python-mappings/JIS0208.TXT', MAPPINGS_JIS0208) | ||||||
|  |     jisx0212file = open_mapping_file('python-mappings/JIS0212.TXT', MAPPINGS_JIS0212) | ||||||
|  |     cp932file = open_mapping_file('python-mappings/CP932.TXT', MAPPINGS_CP932) | ||||||
|  |     jisx0213file = open_mapping_file('python-mappings/jisx0213-2004-std.txt', MAPPINGS_JISX0213_2004) | ||||||
|  | 
 | ||||||
|  |     print("Loading Mapping File...") | ||||||
|  | 
 | ||||||
|  |     sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) | ||||||
|  |     jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) | ||||||
|  |     jisx0212decmap = loadmap(jisx0212file) | ||||||
|  |     cp932decmap = loadmap(cp932file) | ||||||
|  |     jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file) | ||||||
|  | 
 | ||||||
|  |     if jis3decmap[0x21][0x24] != 0xff0c: | ||||||
|  |         raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff') | ||||||
|  | 
 | ||||||
|  |     sjisencmap, cp932encmap = {}, {} | ||||||
|  |     jisx0208_0212encmap = {} | ||||||
|  |     for c1, m in sjisdecmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             sjisencmap.setdefault(code >> 8, {}) | ||||||
|  |             sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||||
|  |     for c1, m in cp932decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             cp932encmap.setdefault(code >> 8, {}) | ||||||
|  |             if (code & 0xff) not in cp932encmap[code >> 8]: | ||||||
|  |                 cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||||
|  |     for c1, m in cp932encmap.copy().items(): | ||||||
|  |         for c2, code in m.copy().items(): | ||||||
|  |             if c1 in sjisencmap and c2 in sjisencmap[c1] and sjisencmap[c1][c2] == code: | ||||||
|  |                 del cp932encmap[c1][c2] | ||||||
|  |                 if not cp932encmap[c1]: | ||||||
|  |                     del cp932encmap[c1] | ||||||
|  | 
 | ||||||
|  |     jisx0213pairdecmap = {} | ||||||
|  |     jisx0213pairencmap = [] | ||||||
|  |     for unibody, m1 in jis3_pairdecmap.items(): | ||||||
|  |         for c1, m2 in m1.items(): | ||||||
|  |             for c2, modifier in m2.items(): | ||||||
|  |                 jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2)) | ||||||
|  |                 jisx0213pairdecmap.setdefault(c1, {}) | ||||||
|  |                 jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier | ||||||
|  | 
 | ||||||
|  |     # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) | ||||||
|  |     for c1, m in jisx0208decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             jisx0208_0212encmap.setdefault(code >> 8, {}) | ||||||
|  |             jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||||
|  | 
 | ||||||
|  |     for c1, m in jisx0212decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             jisx0208_0212encmap.setdefault(code >> 8, {}) | ||||||
|  |             if (code & 0xff) in jisx0208_0212encmap[code >> 8]: | ||||||
|  |                 print("OOPS!!!", (code)) | ||||||
|  |             jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 | ||||||
|  | 
 | ||||||
|  |     jisx0213bmpencmap = {} | ||||||
|  |     for c1, m in jis3decmap.copy().items(): | ||||||
|  |         for c2, code in m.copy().items(): | ||||||
|  |             if c1 in jisx0208decmap and c2 in jisx0208decmap[c1]: | ||||||
|  |                 if code in jis3_pairdecmap: | ||||||
|  |                     jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair | ||||||
|  |                     jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) | ||||||
|  |                 elif jisx0208decmap[c1][c2] == code: | ||||||
|  |                     del jis3decmap[c1][c2] | ||||||
|  |                     if not jis3decmap[c1]: | ||||||
|  |                         del jis3decmap[c1] | ||||||
|  |                 else: | ||||||
|  |                     raise ValueError("Difference between JIS X 0208 and JIS X 0213 Plane 1 is found.") | ||||||
|  |             else: | ||||||
|  |                 jisx0213bmpencmap.setdefault(code >> 8, {}) | ||||||
|  |                 if code not in jis3_pairdecmap: | ||||||
|  |                     jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||||
|  |                 else: | ||||||
|  |                     jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair | ||||||
|  |                     jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) | ||||||
|  | 
 | ||||||
|  |     for c1, m in jis4decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             jisx0213bmpencmap.setdefault(code >> 8, {}) | ||||||
|  |             jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 | ||||||
|  | 
 | ||||||
|  |     jisx0213empencmap = {} | ||||||
|  |     for c1, m in jis3_2_decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             jisx0213empencmap.setdefault(code >> 8, {}) | ||||||
|  |             jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||||
|  |     for c1, m in jis4_2_decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             jisx0213empencmap.setdefault(code >> 8, {}) | ||||||
|  |             jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 | ||||||
|  | 
 | ||||||
|  |     with open("mappings_jp.h", "w") as fp: | ||||||
|  |         print_autogen(fp, os.path.basename(__file__)) | ||||||
|  |         print("Generating JIS X 0208 decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0208", jisx0208decmap) | ||||||
|  |         writer.update_decode_map(JISX0208_C1, JISX0208_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0212 decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0212", jisx0212decmap) | ||||||
|  |         writer.update_decode_map(JISX0212_C1, JISX0212_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0208 && JIS X 0212 encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "jisxcommon", jisx0208_0212encmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating CP932 Extension decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "cp932ext", cp932decmap) | ||||||
|  |         writer.update_decode_map(CP932P0_C1, CP932P0_C2) | ||||||
|  |         writer.update_decode_map(CP932P1_C1, CP932P1_C2) | ||||||
|  |         writer.update_decode_map(CP932P2_C1, CP932P2_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating CP932 Extension encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "cp932ext", cp932encmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 Plane 1 BMP decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0213_1_bmp", jis3decmap) | ||||||
|  |         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 Plane 2 BMP decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0213_2_bmp", jis4decmap) | ||||||
|  |         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 BMP encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "jisx0213_bmp", jisx0213bmpencmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 Plane 1 EMP decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0213_1_emp", jis3_2_decmap) | ||||||
|  |         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 Plane 2 EMP decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0213_2_emp", jis4_2_decmap) | ||||||
|  |         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 EMP encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "jisx0213_emp", jisx0213empencmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |     with open('mappings_jisx0213_pair.h', 'w') as fp: | ||||||
|  |         print_autogen(fp, os.path.basename(__file__)) | ||||||
|  |         fp.write(f"#define JISX0213_ENCPAIRS {len(jisx0213pairencmap)}\n") | ||||||
|  |         fp.write("""\ | ||||||
|  | #ifdef EXTERN_JISX0213_PAIR | ||||||
|  | static const struct widedbcs_index *jisx0213_pair_decmap; | ||||||
|  | static const struct pair_encodemap *jisx0213_pair_encmap; | ||||||
|  | #else | ||||||
|  | """) | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 unicode-pair decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "jisx0213_pair", jisx0213pairdecmap) | ||||||
|  |         writer.update_decode_map(JISX0213_C1, JISX0213_C2) | ||||||
|  |         writer.generate(wide=True) | ||||||
|  | 
 | ||||||
|  |         print("Generating JIS X 0213 unicode-pair encode map...") | ||||||
|  |         jisx0213pairencmap.sort() | ||||||
|  |         fp.write("static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {\n") | ||||||
|  |         filler = BufferedFiller() | ||||||
|  |         for body, modifier, jis in jisx0213pairencmap: | ||||||
|  |             filler.write('{', '0x%04x%04x,' % (body, modifier), '0x%04x' % jis, '},') | ||||||
|  |         filler.printout(fp) | ||||||
|  |         fp.write("};\n") | ||||||
|  |         fp.write("#endif\n") | ||||||
|  | 
 | ||||||
|  |     print("Done!") | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
							
								
								
									
										62
									
								
								Tools/unicode/genmap_korean.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								Tools/unicode/genmap_korean.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,62 @@ | ||||||
|  | # | ||||||
|  | # genmap_korean.py: Korean Codecs Map Generator | ||||||
|  | # | ||||||
|  | # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||||
|  | # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||||
|  | # | ||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | from genmap_support import * | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | KSX1001_C1 = (0x21, 0x7e) | ||||||
|  | KSX1001_C2 = (0x21, 0x7e) | ||||||
|  | UHCL1_C1 = (0x81, 0xa0) | ||||||
|  | UHCL1_C2 = (0x41, 0xfe) | ||||||
|  | UHCL2_C1 = (0xa1, 0xfe) | ||||||
|  | UHCL2_C2 = (0x41, 0xa0) | ||||||
|  | MAPPINGS_CP949 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     mapfile = open_mapping_file('python-mappings/CP949.TXT', MAPPINGS_CP949) | ||||||
|  |     print("Loading Mapping File...") | ||||||
|  |     decmap = loadmap(mapfile) | ||||||
|  |     uhcdecmap, ksx1001decmap, cp949encmap = {}, {}, {} | ||||||
|  |     for c1, c2map in decmap.items(): | ||||||
|  |         for c2, code in c2map.items(): | ||||||
|  |             if c1 >= 0xa1 and c2 >= 0xa1: | ||||||
|  |                 ksx1001decmap.setdefault(c1 & 0x7f, {}) | ||||||
|  |                 ksx1001decmap[c1 & 0x7f][c2 & 0x7f] = c2map[c2] | ||||||
|  |                 cp949encmap.setdefault(code >> 8, {}) | ||||||
|  |                 cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2) & 0x7f7f | ||||||
|  |             else: | ||||||
|  |                 # uhc | ||||||
|  |                 uhcdecmap.setdefault(c1, {}) | ||||||
|  |                 uhcdecmap[c1][c2] = c2map[c2] | ||||||
|  |                 cp949encmap.setdefault(code >> 8, {})  # MSB set | ||||||
|  |                 cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2) | ||||||
|  | 
 | ||||||
|  |     with open('mappings_kr.h', 'w') as fp: | ||||||
|  |         print_autogen(fp, os.path.basename(__file__)) | ||||||
|  | 
 | ||||||
|  |         print("Generating KS X 1001 decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "ksx1001", ksx1001decmap) | ||||||
|  |         writer.update_decode_map(KSX1001_C1, KSX1001_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating UHC decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "cp949ext", uhcdecmap) | ||||||
|  |         writer.update_decode_map(UHCL1_C1, UHCL1_C2) | ||||||
|  |         writer.update_decode_map(UHCL2_C1, UHCL2_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating CP949 (includes KS X 1001) encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "cp949", cp949encmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |     print("Done!") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
							
								
								
									
										149
									
								
								Tools/unicode/genmap_schinese.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								Tools/unicode/genmap_schinese.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,149 @@ | ||||||
|  | # | ||||||
|  | # genmap_schinese.py: Simplified Chinese Codecs Map Generator | ||||||
|  | # | ||||||
|  | # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||||
|  | # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||||
|  | # | ||||||
|  | import os | ||||||
|  | import re | ||||||
|  | 
 | ||||||
|  | from genmap_support import * | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | GB2312_C1   = (0x21, 0x7e) | ||||||
|  | GB2312_C2   = (0x21, 0x7e) | ||||||
|  | GBKL1_C1    = (0x81, 0xa8) | ||||||
|  | GBKL1_C2    = (0x40, 0xfe) | ||||||
|  | GBKL2_C1    = (0xa9, 0xfe) | ||||||
|  | GBKL2_C2    = (0x40, 0xa0) | ||||||
|  | GB18030EXTP1_C1 = (0xa1, 0xa9) | ||||||
|  | GB18030EXTP1_C2 = (0x40, 0xfe) | ||||||
|  | GB18030EXTP2_C1 = (0xaa, 0xaf) | ||||||
|  | GB18030EXTP2_C2 = (0xa1, 0xfe) | ||||||
|  | GB18030EXTP3_C1 = (0xd7, 0xd7) | ||||||
|  | GB18030EXTP3_C2 = (0xfa, 0xfe) | ||||||
|  | GB18030EXTP4_C1 = (0xf8, 0xfd) | ||||||
|  | GB18030EXTP4_C2 = (0xa1, 0xfe) | ||||||
|  | GB18030EXTP5_C1 = (0xfe, 0xfe) | ||||||
|  | GB18030EXTP5_C2 = (0x50, 0xfe) | ||||||
|  | 
 | ||||||
|  | MAPPINGS_GB2312 = 'http://people.freebsd.org/~perky/i18n/GB2312.TXT' | ||||||
|  | MAPPINGS_CP936 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT' | ||||||
|  | MAPPINGS_GB18030 = 'http://oss.software.ibm.com/cvs/icu/~checkout~/charset/data/xml/gb-18030-2000.xml' | ||||||
|  | 
 | ||||||
|  | re_gb18030ass = re.compile('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def parse_gb18030map(fo): | ||||||
|  |     m, gbuni = {}, {} | ||||||
|  |     for i in range(65536): | ||||||
|  |         if i < 0xd800 or i > 0xdfff: # exclude unicode surrogate area | ||||||
|  |             gbuni[i] = None | ||||||
|  |     for uni, native in re_gb18030ass.findall(fo.read()): | ||||||
|  |         uni = eval('0x'+uni) | ||||||
|  |         native = [eval('0x'+u) for u in native.split()] | ||||||
|  |         if len(native) <= 2: | ||||||
|  |             del gbuni[uni] | ||||||
|  |         if len(native) == 2: # we can decode algorithmically for 1 or 4 bytes | ||||||
|  |             m.setdefault(native[0], {}) | ||||||
|  |             m[native[0]][native[1]] = uni | ||||||
|  |     gbuni = [k for k in gbuni.keys()] | ||||||
|  |     gbuni.sort() | ||||||
|  |     return m, gbuni | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     print("Loading Mapping File...") | ||||||
|  |     gb2312map = open_mapping_file('python-mappings/GB2312.TXT', MAPPINGS_GB2312) | ||||||
|  |     cp936map = open_mapping_file('python-mappings/CP936.TXT', MAPPINGS_CP936) | ||||||
|  |     gb18030map = open_mapping_file('python-mappings/gb-18030-2000.xml', MAPPINGS_GB18030) | ||||||
|  | 
 | ||||||
|  |     gb18030decmap, gb18030unilinear = parse_gb18030map(gb18030map) | ||||||
|  |     gbkdecmap = loadmap(cp936map) | ||||||
|  |     gb2312decmap = loadmap(gb2312map) | ||||||
|  |     difmap = {} | ||||||
|  |     for c1, m in gbkdecmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             del gb18030decmap[c1][c2] | ||||||
|  |             if not gb18030decmap[c1]: | ||||||
|  |                 del gb18030decmap[c1] | ||||||
|  |     for c1, m in gb2312decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             gbkc1, gbkc2 = c1 | 0x80, c2 | 0x80 | ||||||
|  |             if gbkdecmap[gbkc1][gbkc2] == code: | ||||||
|  |                 del gbkdecmap[gbkc1][gbkc2] | ||||||
|  |                 if not gbkdecmap[gbkc1]: | ||||||
|  |                     del gbkdecmap[gbkc1] | ||||||
|  | 
 | ||||||
|  |     gb2312_gbkencmap, gb18030encmap = {}, {} | ||||||
|  |     for c1, m in gbkdecmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             gb2312_gbkencmap.setdefault(code >> 8, {}) | ||||||
|  |             gb2312_gbkencmap[code >> 8][code & 0xff] = c1 << 8 | c2 # MSB set | ||||||
|  |     for c1, m in gb2312decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             gb2312_gbkencmap.setdefault(code >> 8, {}) | ||||||
|  |             gb2312_gbkencmap[code >> 8][code & 0xff] = c1 << 8 | c2 # MSB unset | ||||||
|  |     for c1, m in gb18030decmap.items(): | ||||||
|  |         for c2, code in m.items(): | ||||||
|  |             gb18030encmap.setdefault(code >> 8, {}) | ||||||
|  |             gb18030encmap[code >> 8][code & 0xff] = c1 << 8 | c2 | ||||||
|  | 
 | ||||||
|  |     with open('mappings_cn.h', 'w') as fp: | ||||||
|  |         print_autogen(fp, os.path.basename(__file__)) | ||||||
|  | 
 | ||||||
|  |         print("Generating GB2312 decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "gb2312", gb2312decmap) | ||||||
|  |         writer.update_decode_map(GB2312_C1, GB2312_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating GBK decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "gbkext", gbkdecmap) | ||||||
|  |         writer.update_decode_map(GBKL1_C1, GBKL1_C2) | ||||||
|  |         writer.update_decode_map(GBKL2_C1, GBKL2_C2) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating GB2312 && GBK encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "gbcommon", gb2312_gbkencmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating GB18030 extension decode map...") | ||||||
|  |         writer = DecodeMapWriter(fp, "gb18030ext", gb18030decmap) | ||||||
|  |         for i in range(1, 6): | ||||||
|  |             writer.update_decode_map(eval("GB18030EXTP%d_C1" % i), eval("GB18030EXTP%d_C2" % i)) | ||||||
|  | 
 | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating GB18030 extension encode map...") | ||||||
|  |         writer = EncodeMapWriter(fp, "gb18030ext", gb18030encmap) | ||||||
|  |         writer.generate() | ||||||
|  | 
 | ||||||
|  |         print("Generating GB18030 Unicode BMP Mapping Ranges...") | ||||||
|  |         ranges = [[-1, -1, -1]] | ||||||
|  |         gblinnum = 0 | ||||||
|  |         fp.write(""" | ||||||
|  | static const struct _gb18030_to_unibmp_ranges { | ||||||
|  |     Py_UCS4   first, last; | ||||||
|  |     DBCHAR       base; | ||||||
|  | } gb18030_to_unibmp_ranges[] = { | ||||||
|  | """) | ||||||
|  | 
 | ||||||
|  |         for uni in gb18030unilinear: | ||||||
|  |             if uni == ranges[-1][1] + 1: | ||||||
|  |                 ranges[-1][1] = uni | ||||||
|  |             else: | ||||||
|  |                 ranges.append([uni, uni, gblinnum]) | ||||||
|  |             gblinnum += 1 | ||||||
|  | 
 | ||||||
|  |         filler = BufferedFiller() | ||||||
|  |         for first, last, base in ranges[1:]: | ||||||
|  |             filler.write('{', str(first), ',', str(last), ',', str(base), '},') | ||||||
|  | 
 | ||||||
|  |         filler.write('{', '0,', '0,', str( | ||||||
|  |             ranges[-1][2] + ranges[-1][1] - ranges[-1][0] + 1), '}', '};') | ||||||
|  |         filler.printout(fp) | ||||||
|  | 
 | ||||||
|  |     print("Done!") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
							
								
								
									
										198
									
								
								Tools/unicode/genmap_support.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								Tools/unicode/genmap_support.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,198 @@ | ||||||
|  | # | ||||||
|  | # genmap_support.py: Multibyte Codec Map Generator | ||||||
|  | # | ||||||
|  | # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||||||
|  | # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||||||
|  | # | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class BufferedFiller: | ||||||
|  |     def __init__(self, column=78): | ||||||
|  |         self.column = column | ||||||
|  |         self.buffered = [] | ||||||
|  |         self.cline = [] | ||||||
|  |         self.clen = 0 | ||||||
|  |         self.count = 0 | ||||||
|  | 
 | ||||||
|  |     def write(self, *data): | ||||||
|  |         for s in data: | ||||||
|  |             if len(s) > self.column: | ||||||
|  |                 raise ValueError("token is too long") | ||||||
|  |             if len(s) + self.clen > self.column: | ||||||
|  |                 self.flush() | ||||||
|  |             self.clen += len(s) | ||||||
|  |             self.cline.append(s) | ||||||
|  |             self.count += 1 | ||||||
|  | 
 | ||||||
|  |     def flush(self): | ||||||
|  |         if not self.cline: | ||||||
|  |             return | ||||||
|  |         self.buffered.append(''.join(self.cline)) | ||||||
|  |         self.clen = 0 | ||||||
|  |         del self.cline[:] | ||||||
|  | 
 | ||||||
|  |     def printout(self, fp): | ||||||
|  |         self.flush() | ||||||
|  |         for l in self.buffered: | ||||||
|  |             fp.write(f'{l}\n') | ||||||
|  |         del self.buffered[:] | ||||||
|  | 
 | ||||||
|  |     def __len__(self): | ||||||
|  |         return self.count | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DecodeMapWriter: | ||||||
|  |     filler_class = BufferedFiller | ||||||
|  | 
 | ||||||
|  |     def __init__(self, fp, prefix, decode_map): | ||||||
|  |         self.fp = fp | ||||||
|  |         self.prefix = prefix | ||||||
|  |         self.decode_map = decode_map | ||||||
|  |         self.filler = self.filler_class() | ||||||
|  | 
 | ||||||
|  |     def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): | ||||||
|  |         c2values = range(c2range[0], c2range[1] + 1) | ||||||
|  | 
 | ||||||
|  |         for c1 in range(c1range[0], c1range[1] + 1): | ||||||
|  |             if c1 not in self.decode_map or (onlymask and c1 not in onlymask): | ||||||
|  |                 continue | ||||||
|  |             c2map = self.decode_map[c1] | ||||||
|  |             rc2values = [n for n in c2values if n in c2map] | ||||||
|  |             if not rc2values: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             c2map[self.prefix] = True | ||||||
|  |             c2map['min'] = rc2values[0] | ||||||
|  |             c2map['max'] = rc2values[-1] | ||||||
|  |             c2map['midx'] = len(self.filler) | ||||||
|  | 
 | ||||||
|  |             for v in range(rc2values[0], rc2values[-1] + 1): | ||||||
|  |                 if v in c2map: | ||||||
|  |                     self.filler.write('%d,' % c2map[v]) | ||||||
|  |                 else: | ||||||
|  |                     self.filler.write('U,') | ||||||
|  | 
 | ||||||
|  |     def generate(self, wide=False): | ||||||
|  |         if not wide: | ||||||
|  |             self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") | ||||||
|  |         else: | ||||||
|  |             self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") | ||||||
|  | 
 | ||||||
|  |         self.filler.printout(self.fp) | ||||||
|  |         self.fp.write("};\n\n") | ||||||
|  | 
 | ||||||
|  |         if not wide: | ||||||
|  |             self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") | ||||||
|  |         else: | ||||||
|  |             self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") | ||||||
|  | 
 | ||||||
|  |         for i in range(256): | ||||||
|  |             if i in self.decode_map and self.prefix in self.decode_map[i]: | ||||||
|  |                 m = self.decode_map | ||||||
|  |                 prefix = self.prefix | ||||||
|  |             else: | ||||||
|  |                 self.filler.write("{", "0,", "0,", "0", "},") | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], | ||||||
|  |                               ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") | ||||||
|  |         self.filler.printout(self.fp) | ||||||
|  |         self.fp.write("};\n\n") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class EncodeMapWriter: | ||||||
|  |     filler_class = BufferedFiller | ||||||
|  |     elemtype = 'DBCHAR' | ||||||
|  |     indextype = 'struct unim_index' | ||||||
|  | 
 | ||||||
|  |     def __init__(self, fp, prefix, encode_map): | ||||||
|  |         self.fp = fp | ||||||
|  |         self.prefix = prefix | ||||||
|  |         self.encode_map = encode_map | ||||||
|  |         self.filler = self.filler_class() | ||||||
|  | 
 | ||||||
|  |     def generate(self): | ||||||
|  |         self.buildmap() | ||||||
|  |         self.printmap() | ||||||
|  | 
 | ||||||
|  |     def buildmap(self): | ||||||
|  |         for c1 in range(0, 256): | ||||||
|  |             if c1 not in self.encode_map: | ||||||
|  |                 continue | ||||||
|  |             c2map = self.encode_map[c1] | ||||||
|  |             rc2values = [k for k in c2map.keys()] | ||||||
|  |             rc2values.sort() | ||||||
|  |             if not rc2values: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             c2map[self.prefix] = True | ||||||
|  |             c2map['min'] = rc2values[0] | ||||||
|  |             c2map['max'] = rc2values[-1] | ||||||
|  |             c2map['midx'] = len(self.filler) | ||||||
|  | 
 | ||||||
|  |             for v in range(rc2values[0], rc2values[-1] + 1): | ||||||
|  |                 if v not in c2map: | ||||||
|  |                     self.write_nochar() | ||||||
|  |                 elif isinstance(c2map[v], int): | ||||||
|  |                     self.write_char(c2map[v]) | ||||||
|  |                 elif isinstance(c2map[v], tuple): | ||||||
|  |                     self.write_multic(c2map[v]) | ||||||
|  |                 else: | ||||||
|  |                     raise ValueError | ||||||
|  | 
 | ||||||
|  |     def write_nochar(self): | ||||||
|  |         self.filler.write('N,') | ||||||
|  | 
 | ||||||
|  |     def write_multic(self, point): | ||||||
|  |         self.filler.write('M,') | ||||||
|  | 
 | ||||||
|  |     def write_char(self, point): | ||||||
|  |         self.filler.write(str(point) + ',') | ||||||
|  | 
 | ||||||
|  |     def printmap(self): | ||||||
|  |         self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") | ||||||
|  |         self.filler.printout(self.fp) | ||||||
|  |         self.fp.write("};\n\n") | ||||||
|  |         self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") | ||||||
|  | 
 | ||||||
|  |         for i in range(256): | ||||||
|  |             if i in self.encode_map and self.prefix in self.encode_map[i]: | ||||||
|  |                 self.filler.write("{", "__%s_encmap" % self.prefix, "+", | ||||||
|  |                                   "%d" % self.encode_map[i]['midx'], ",", | ||||||
|  |                                   "%d," % self.encode_map[i]['min'], | ||||||
|  |                                   "%d" % self.encode_map[i]['max'], "},") | ||||||
|  |             else: | ||||||
|  |                 self.filler.write("{", "0,", "0,", "0", "},") | ||||||
|  |                 continue | ||||||
|  |         self.filler.printout(self.fp) | ||||||
|  |         self.fp.write("};\n\n") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def open_mapping_file(path, source): | ||||||
|  |     try: | ||||||
|  |         f = open(path) | ||||||
|  |     except IOError: | ||||||
|  |         raise SystemExit(f'{source} is needed') | ||||||
|  |     return f | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def print_autogen(fo, source): | ||||||
|  |     fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def loadmap(fo, natcol=0, unicol=1, sbcs=0): | ||||||
|  |     print("Loading from", fo) | ||||||
|  |     fo.seek(0, 0) | ||||||
|  |     decmap = {} | ||||||
|  |     for line in fo: | ||||||
|  |         line = line.split('#', 1)[0].strip() | ||||||
|  |         if not line or len(line.split()) < 2: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         row = [eval(e) for e in line.split()] | ||||||
|  |         loc, uni = row[natcol], row[unicol] | ||||||
|  |         if loc >= 0x100 or sbcs: | ||||||
|  |             decmap.setdefault((loc >> 8), {}) | ||||||
|  |             decmap[(loc >> 8)][(loc & 0xff)] = uni | ||||||
|  | 
 | ||||||
|  |     return decmap | ||||||
							
								
								
									
										7515
									
								
								Tools/unicode/python-mappings/GB2312.TXT
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										7515
									
								
								Tools/unicode/python-mappings/GB2312.TXT
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										271
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2000-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										271
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2000-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,271 @@ | ||||||
|  | --- jisx0213-2000-std.txt.orig	Tue Apr 16 23:32:38 2002
 | ||||||
|  | +++ jisx0213-2000-std.txt	Wed Jun 16 14:49:05 2004
 | ||||||
|  | @@ -23,21 +23,21 @@
 | ||||||
|  |  3-2121	U+3000	# IDEOGRAPHIC SPACE | ||||||
|  |  3-2122	U+3001	# IDEOGRAPHIC COMMA | ||||||
|  |  3-2123	U+3002	# IDEOGRAPHIC FULL STOP | ||||||
|  | -3-2124	U+002C	# COMMA	Fullwidth: U+FF0C
 | ||||||
|  | -3-2125	U+002E	# FULL STOP	Fullwidth: U+FF0E
 | ||||||
|  | +3-2124	U+FF0C	# COMMA	Fullwidth: U+FF0C
 | ||||||
|  | +3-2125	U+FF0E	# FULL STOP	Fullwidth: U+FF0E
 | ||||||
|  |  3-2126	U+30FB	# KATAKANA MIDDLE DOT | ||||||
|  | -3-2127	U+003A	# COLON	Fullwidth: U+FF1A
 | ||||||
|  | -3-2128	U+003B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||||
|  | -3-2129	U+003F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||||
|  | -3-212A	U+0021	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||||
|  | +3-2127	U+FF1A	# COLON	Fullwidth: U+FF1A
 | ||||||
|  | +3-2128	U+FF1B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||||
|  | +3-2129	U+FF1F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||||
|  | +3-212A	U+FF01	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||||
|  |  3-212B	U+309B	# KATAKANA-HIRAGANA VOICED SOUND MARK | ||||||
|  |  3-212C	U+309C	# KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK | ||||||
|  |  3-212D	U+00B4	# ACUTE ACCENT | ||||||
|  | -3-212E	U+0060	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||||
|  | +3-212E	U+FF40	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||||
|  |  3-212F	U+00A8	# DIAERESIS | ||||||
|  | -3-2130	U+005E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||||
|  | -3-2131	U+203E	# OVERLINE	Windows: U+FFE3
 | ||||||
|  | -3-2132	U+005F	# LOW LINE	Fullwidth: U+FF3F
 | ||||||
|  | +3-2130	U+FF3E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||||
|  | +3-2131	U+FFE3	# OVERLINE	Windows: U+FFE3
 | ||||||
|  | +3-2132	U+FF3F	# LOW LINE	Fullwidth: U+FF3F
 | ||||||
|  |  3-2133	U+30FD	# KATAKANA ITERATION MARK | ||||||
|  |  3-2134	U+30FE	# KATAKANA VOICED ITERATION MARK | ||||||
|  |  3-2135	U+309D	# HIRAGANA ITERATION MARK | ||||||
|  | @@ -48,27 +48,27 @@
 | ||||||
|  |  3-213A	U+3006	# IDEOGRAPHIC CLOSING MARK | ||||||
|  |  3-213B	U+3007	# IDEOGRAPHIC NUMBER ZERO | ||||||
|  |  3-213C	U+30FC	# KATAKANA-HIRAGANA PROLONGED SOUND MARK | ||||||
|  | -3-213D	U+2014	# EM DASH	Windows: U+2015
 | ||||||
|  | +3-213D	U+2015	# EM DASH	Windows: U+2015
 | ||||||
|  |  3-213E	U+2010	# HYPHEN | ||||||
|  | -3-213F	U+002F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||||
|  | +3-213F	U+FF0F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||||
|  |  3-2140	U+005C	# REVERSE SOLIDUS	Fullwidth: U+FF3C | ||||||
|  |  3-2141	U+301C	# WAVE DASH	Windows: U+FF5E | ||||||
|  |  3-2142	U+2016	# DOUBLE VERTICAL LINE	Windows: U+2225 | ||||||
|  | -3-2143	U+007C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||||
|  | +3-2143	U+FF5C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||||
|  |  3-2144	U+2026	# HORIZONTAL ELLIPSIS | ||||||
|  |  3-2145	U+2025	# TWO DOT LEADER | ||||||
|  |  3-2146	U+2018	# LEFT SINGLE QUOTATION MARK | ||||||
|  |  3-2147	U+2019	# RIGHT SINGLE QUOTATION MARK | ||||||
|  |  3-2148	U+201C	# LEFT DOUBLE QUOTATION MARK | ||||||
|  |  3-2149	U+201D	# RIGHT DOUBLE QUOTATION MARK | ||||||
|  | -3-214A	U+0028	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||||
|  | -3-214B	U+0029	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||||
|  | +3-214A	U+FF08	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||||
|  | +3-214B	U+FF09	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||||
|  |  3-214C	U+3014	# LEFT TORTOISE SHELL BRACKET | ||||||
|  |  3-214D	U+3015	# RIGHT TORTOISE SHELL BRACKET | ||||||
|  | -3-214E	U+005B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||||
|  | -3-214F	U+005D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||||
|  | -3-2150	U+007B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||||
|  | -3-2151	U+007D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||||
|  | +3-214E	U+FF3B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||||
|  | +3-214F	U+FF3D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||||
|  | +3-2150	U+FF5B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||||
|  | +3-2151	U+FF5D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||||
|  |  3-2152	U+3008	# LEFT ANGLE BRACKET | ||||||
|  |  3-2153	U+3009	# RIGHT ANGLE BRACKET | ||||||
|  |  3-2154	U+300A	# LEFT DOUBLE ANGLE BRACKET | ||||||
|  | @@ -79,15 +79,15 @@
 | ||||||
|  |  3-2159	U+300F	# RIGHT WHITE CORNER BRACKET | ||||||
|  |  3-215A	U+3010	# LEFT BLACK LENTICULAR BRACKET | ||||||
|  |  3-215B	U+3011	# RIGHT BLACK LENTICULAR BRACKET | ||||||
|  | -3-215C	U+002B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||||
|  | +3-215C	U+FF0B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||||
|  |  3-215D	U+2212	# MINUS SIGN	Windows: U+FF0D | ||||||
|  |  3-215E	U+00B1	# PLUS-MINUS SIGN | ||||||
|  |  3-215F	U+00D7	# MULTIPLICATION SIGN | ||||||
|  |  3-2160	U+00F7	# DIVISION SIGN | ||||||
|  | -3-2161	U+003D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||||
|  | +3-2161	U+FF1D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||||
|  |  3-2162	U+2260	# NOT EQUAL TO | ||||||
|  | -3-2163	U+003C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||||
|  | -3-2164	U+003E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||||
|  | +3-2163	U+FF1C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||||
|  | +3-2164	U+FF1E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||||
|  |  3-2165	U+2266	# LESS-THAN OVER EQUAL TO | ||||||
|  |  3-2166	U+2267	# GREATER-THAN OVER EQUAL TO | ||||||
|  |  3-2167	U+221E	# INFINITY | ||||||
|  | @@ -98,15 +98,15 @@
 | ||||||
|  |  3-216C	U+2032	# PRIME | ||||||
|  |  3-216D	U+2033	# DOUBLE PRIME | ||||||
|  |  3-216E	U+2103	# DEGREE CELSIUS | ||||||
|  | -3-216F	U+00A5	# YEN SIGN	Windows: U+FFE5
 | ||||||
|  | -3-2170	U+0024	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||||
|  | +3-216F	U+FFE5	# YEN SIGN	Windows: U+FFE5
 | ||||||
|  | +3-2170	U+FF04	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||||
|  |  3-2171	U+00A2	# CENT SIGN	Windows: U+FFE0 | ||||||
|  |  3-2172	U+00A3	# POUND SIGN	Windows: U+FFE1 | ||||||
|  | -3-2173	U+0025	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||||
|  | -3-2174	U+0023	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||||
|  | -3-2175	U+0026	# AMPERSAND	Fullwidth: U+FF06
 | ||||||
|  | -3-2176	U+002A	# ASTERISK	Fullwidth: U+FF0A
 | ||||||
|  | -3-2177	U+0040	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||||
|  | +3-2173	U+FF05	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||||
|  | +3-2174	U+FF03	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||||
|  | +3-2175	U+FF06	# AMPERSAND	Fullwidth: U+FF06
 | ||||||
|  | +3-2176	U+FF0A	# ASTERISK	Fullwidth: U+FF0A
 | ||||||
|  | +3-2177	U+FF20	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||||
|  |  3-2178	U+00A7	# SECTION SIGN | ||||||
|  |  3-2179	U+2606	# WHITE STAR | ||||||
|  |  3-217A	U+2605	# BLACK STAR | ||||||
|  | @@ -128,9 +128,9 @@
 | ||||||
|  |  3-222C	U+2191	# UPWARDS ARROW | ||||||
|  |  3-222D	U+2193	# DOWNWARDS ARROW | ||||||
|  |  3-222E	U+3013	# GETA MARK | ||||||
|  | -3-222F	U+0027	# APOSTROPHE	Fullwidth: U+FF07
 | ||||||
|  | -3-2230	U+0022	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||||
|  | -3-2231	U+002D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||||
|  | +3-222F	U+FF07	# APOSTROPHE	Fullwidth: U+FF07
 | ||||||
|  | +3-2230	U+FF02	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||||
|  | +3-2231	U+FF0D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||||
|  |  3-2232	U+007E	# TILDE	[2000]	Fullwidth: U+FF5E | ||||||
|  |  3-2233	U+3033	# VERTICAL KANA REPEAT MARK UPPER HALF	[2000] | ||||||
|  |  3-2234	U+3034	# VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF	[2000] | ||||||
|  | @@ -223,16 +223,16 @@
 | ||||||
|  |  3-232D	U+21E9	# DOWNWARDS WHITE ARROW	[2000] | ||||||
|  |  3-232E	U+2934	# ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS	[2000]	[Unicode3.2] | ||||||
|  |  3-232F	U+2935	# ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS	[2000]	[Unicode3.2] | ||||||
|  | -3-2330	U+0030	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||||
|  | -3-2331	U+0031	# DIGIT ONE	Fullwidth: U+FF11
 | ||||||
|  | -3-2332	U+0032	# DIGIT TWO	Fullwidth: U+FF12
 | ||||||
|  | -3-2333	U+0033	# DIGIT THREE	Fullwidth: U+FF13
 | ||||||
|  | -3-2334	U+0034	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||||
|  | -3-2335	U+0035	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||||
|  | -3-2336	U+0036	# DIGIT SIX	Fullwidth: U+FF16
 | ||||||
|  | -3-2337	U+0037	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||||
|  | -3-2338	U+0038	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||||
|  | -3-2339	U+0039	# DIGIT NINE	Fullwidth: U+FF19
 | ||||||
|  | +3-2330	U+FF10	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||||
|  | +3-2331	U+FF11	# DIGIT ONE	Fullwidth: U+FF11
 | ||||||
|  | +3-2332	U+FF12	# DIGIT TWO	Fullwidth: U+FF12
 | ||||||
|  | +3-2333	U+FF13	# DIGIT THREE	Fullwidth: U+FF13
 | ||||||
|  | +3-2334	U+FF14	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||||
|  | +3-2335	U+FF15	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||||
|  | +3-2336	U+FF16	# DIGIT SIX	Fullwidth: U+FF16
 | ||||||
|  | +3-2337	U+FF17	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||||
|  | +3-2338	U+FF18	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||||
|  | +3-2339	U+FF19	# DIGIT NINE	Fullwidth: U+FF19
 | ||||||
|  |  3-233A	U+29BF	# CIRCLED BULLET	[2000]	[Unicode3.2] | ||||||
|  |  3-233B	U+25C9	# FISHEYE	[2000] | ||||||
|  |  3-233C	U+303D	# PART ALTERNATION MARK	[2000]	[Unicode3.2] | ||||||
|  | @@ -240,64 +240,64 @@
 | ||||||
|  |  3-233E	U+FE45	# SESAME DOT	[2000]	[Unicode3.2] | ||||||
|  |  3-233F	U+25E6	# WHITE BULLET	[2000] | ||||||
|  |  3-2340	U+2022	# BULLET	[2000] | ||||||
|  | -3-2341	U+0041	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||||
|  | -3-2342	U+0042	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||||
|  | -3-2343	U+0043	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||||
|  | -3-2344	U+0044	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||||
|  | -3-2345	U+0045	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||||
|  | -3-2346	U+0046	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||||
|  | -3-2347	U+0047	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||||
|  | -3-2348	U+0048	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||||
|  | -3-2349	U+0049	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||||
|  | -3-234A	U+004A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||||
|  | -3-234B	U+004B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||||
|  | -3-234C	U+004C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||||
|  | -3-234D	U+004D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||||
|  | -3-234E	U+004E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||||
|  | -3-234F	U+004F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||||
|  | -3-2350	U+0050	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||||
|  | -3-2351	U+0051	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||||
|  | -3-2352	U+0052	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||||
|  | -3-2353	U+0053	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||||
|  | -3-2354	U+0054	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||||
|  | -3-2355	U+0055	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||||
|  | -3-2356	U+0056	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||||
|  | -3-2357	U+0057	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||||
|  | -3-2358	U+0058	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||||
|  | -3-2359	U+0059	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||||
|  | -3-235A	U+005A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||||
|  | +3-2341	U+FF21	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||||
|  | +3-2342	U+FF22	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||||
|  | +3-2343	U+FF23	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||||
|  | +3-2344	U+FF24	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||||
|  | +3-2345	U+FF25	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||||
|  | +3-2346	U+FF26	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||||
|  | +3-2347	U+FF27	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||||
|  | +3-2348	U+FF28	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||||
|  | +3-2349	U+FF29	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||||
|  | +3-234A	U+FF2A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||||
|  | +3-234B	U+FF2B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||||
|  | +3-234C	U+FF2C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||||
|  | +3-234D	U+FF2D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||||
|  | +3-234E	U+FF2E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||||
|  | +3-234F	U+FF2F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||||
|  | +3-2350	U+FF30	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||||
|  | +3-2351	U+FF31	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||||
|  | +3-2352	U+FF32	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||||
|  | +3-2353	U+FF33	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||||
|  | +3-2354	U+FF34	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||||
|  | +3-2355	U+FF35	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||||
|  | +3-2356	U+FF36	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||||
|  | +3-2357	U+FF37	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||||
|  | +3-2358	U+FF38	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||||
|  | +3-2359	U+FF39	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||||
|  | +3-235A	U+FF3A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||||
|  |  3-235B	U+2213	# MINUS-OR-PLUS SIGN	[2000] | ||||||
|  |  3-235C	U+2135	# ALEF SYMBOL	[2000] | ||||||
|  |  3-235D	U+210F	# PLANCK CONSTANT OVER TWO PI	[2000] | ||||||
|  |  3-235E	U+33CB	# SQUARE HP	[2000] | ||||||
|  |  3-235F	U+2113	# SCRIPT SMALL L	[2000] | ||||||
|  |  3-2360	U+2127	# INVERTED OHM SIGN	[2000] | ||||||
|  | -3-2361	U+0061	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||||
|  | -3-2362	U+0062	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||||
|  | -3-2363	U+0063	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||||
|  | -3-2364	U+0064	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||||
|  | -3-2365	U+0065	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||||
|  | -3-2366	U+0066	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||||
|  | -3-2367	U+0067	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||||
|  | -3-2368	U+0068	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||||
|  | -3-2369	U+0069	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||||
|  | -3-236A	U+006A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||||
|  | -3-236B	U+006B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||||
|  | -3-236C	U+006C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||||
|  | -3-236D	U+006D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||||
|  | -3-236E	U+006E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||||
|  | -3-236F	U+006F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||||
|  | -3-2370	U+0070	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||||
|  | -3-2371	U+0071	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||||
|  | -3-2372	U+0072	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||||
|  | -3-2373	U+0073	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||||
|  | -3-2374	U+0074	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||||
|  | -3-2375	U+0075	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||||
|  | -3-2376	U+0076	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||||
|  | -3-2377	U+0077	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||||
|  | -3-2378	U+0078	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||||
|  | -3-2379	U+0079	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||||
|  | -3-237A	U+007A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||||
|  | +3-2361	U+FF41	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||||
|  | +3-2362	U+FF42	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||||
|  | +3-2363	U+FF43	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||||
|  | +3-2364	U+FF44	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||||
|  | +3-2365	U+FF45	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||||
|  | +3-2366	U+FF46	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||||
|  | +3-2367	U+FF47	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||||
|  | +3-2368	U+FF48	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||||
|  | +3-2369	U+FF49	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||||
|  | +3-236A	U+FF4A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||||
|  | +3-236B	U+FF4B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||||
|  | +3-236C	U+FF4C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||||
|  | +3-236D	U+FF4D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||||
|  | +3-236E	U+FF4E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||||
|  | +3-236F	U+FF4F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||||
|  | +3-2370	U+FF50	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||||
|  | +3-2371	U+FF51	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||||
|  | +3-2372	U+FF52	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||||
|  | +3-2373	U+FF53	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||||
|  | +3-2374	U+FF54	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||||
|  | +3-2375	U+FF55	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||||
|  | +3-2376	U+FF56	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||||
|  | +3-2377	U+FF57	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||||
|  | +3-2378	U+FF58	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||||
|  | +3-2379	U+FF59	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||||
|  | +3-237A	U+FF5A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||||
|  |  3-237B	U+30A0	# KATAKANA-HIRAGANA DOUBLE HYPHEN	[2000]	[Unicode3.2] | ||||||
|  |  3-237C	U+2013	# EN DASH	[2000] | ||||||
|  |  3-237D	U+29FA	# DOUBLE PLUS	[2000]	[Unicode3.2] | ||||||
							
								
								
									
										351
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2004-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										351
									
								
								Tools/unicode/python-mappings/diff/jisx0213-2004-std.txt.diff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,351 @@ | ||||||
|  | --- jisx0213-2000-std.txt.orig	Tue Apr 16 23:32:38 2002
 | ||||||
|  | +++ jisx0213-2004-std.txt	Thu Jul  8 11:51:54 2004
 | ||||||
|  | @@ -1,6 +1,6 @@
 | ||||||
|  | -## JIS X 0213:2000 vs Unicode mapping table
 | ||||||
|  | +## JIS X 0213:2004 vs Unicode mapping table
 | ||||||
|  |  ##  | ||||||
|  | -## Date: 16 Apr 2002 13:09:49 GMT
 | ||||||
|  | +## Date: 7 Jul 2004 13:09:49 GMT
 | ||||||
|  |  ## License: | ||||||
|  |  ## 	Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved. | ||||||
|  |  ## 	Copyright (C) 2001 I'O, All Rights Reserved. | ||||||
|  | @@ -23,21 +23,21 @@
 | ||||||
|  |  3-2121	U+3000	# IDEOGRAPHIC SPACE | ||||||
|  |  3-2122	U+3001	# IDEOGRAPHIC COMMA | ||||||
|  |  3-2123	U+3002	# IDEOGRAPHIC FULL STOP | ||||||
|  | -3-2124	U+002C	# COMMA	Fullwidth: U+FF0C
 | ||||||
|  | -3-2125	U+002E	# FULL STOP	Fullwidth: U+FF0E
 | ||||||
|  | +3-2124	U+FF0C	# COMMA	Fullwidth: U+FF0C
 | ||||||
|  | +3-2125	U+FF0E	# FULL STOP	Fullwidth: U+FF0E
 | ||||||
|  |  3-2126	U+30FB	# KATAKANA MIDDLE DOT | ||||||
|  | -3-2127	U+003A	# COLON	Fullwidth: U+FF1A
 | ||||||
|  | -3-2128	U+003B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||||
|  | -3-2129	U+003F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||||
|  | -3-212A	U+0021	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||||
|  | +3-2127	U+FF1A	# COLON	Fullwidth: U+FF1A
 | ||||||
|  | +3-2128	U+FF1B	# SEMICOLON	Fullwidth: U+FF1B
 | ||||||
|  | +3-2129	U+FF1F	# QUESTION MARK	Fullwidth: U+FF1F
 | ||||||
|  | +3-212A	U+FF01	# EXCLAMATION MARK	Fullwidth: U+FF01
 | ||||||
|  |  3-212B	U+309B	# KATAKANA-HIRAGANA VOICED SOUND MARK | ||||||
|  |  3-212C	U+309C	# KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK | ||||||
|  |  3-212D	U+00B4	# ACUTE ACCENT | ||||||
|  | -3-212E	U+0060	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||||
|  | +3-212E	U+FF40	# GRAVE ACCENT	Fullwidth: U+FF40
 | ||||||
|  |  3-212F	U+00A8	# DIAERESIS | ||||||
|  | -3-2130	U+005E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||||
|  | -3-2131	U+203E	# OVERLINE	Windows: U+FFE3
 | ||||||
|  | -3-2132	U+005F	# LOW LINE	Fullwidth: U+FF3F
 | ||||||
|  | +3-2130	U+FF3E	# CIRCUMFLEX ACCENT	Fullwidth: U+FF3E
 | ||||||
|  | +3-2131	U+FFE3	# OVERLINE	Windows: U+FFE3
 | ||||||
|  | +3-2132	U+FF3F	# LOW LINE	Fullwidth: U+FF3F
 | ||||||
|  |  3-2133	U+30FD	# KATAKANA ITERATION MARK | ||||||
|  |  3-2134	U+30FE	# KATAKANA VOICED ITERATION MARK | ||||||
|  |  3-2135	U+309D	# HIRAGANA ITERATION MARK | ||||||
|  | @@ -48,27 +48,27 @@
 | ||||||
|  |  3-213A	U+3006	# IDEOGRAPHIC CLOSING MARK | ||||||
|  |  3-213B	U+3007	# IDEOGRAPHIC NUMBER ZERO | ||||||
|  |  3-213C	U+30FC	# KATAKANA-HIRAGANA PROLONGED SOUND MARK | ||||||
|  | -3-213D	U+2014	# EM DASH	Windows: U+2015
 | ||||||
|  | +3-213D	U+2015	# EM DASH	Windows: U+2015
 | ||||||
|  |  3-213E	U+2010	# HYPHEN | ||||||
|  | -3-213F	U+002F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||||
|  | +3-213F	U+FF0F	# SOLIDUS	Fullwidth: U+FF0F
 | ||||||
|  |  3-2140	U+005C	# REVERSE SOLIDUS	Fullwidth: U+FF3C | ||||||
|  |  3-2141	U+301C	# WAVE DASH	Windows: U+FF5E | ||||||
|  |  3-2142	U+2016	# DOUBLE VERTICAL LINE	Windows: U+2225 | ||||||
|  | -3-2143	U+007C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||||
|  | +3-2143	U+FF5C	# VERTICAL LINE	Fullwidth: U+FF5C
 | ||||||
|  |  3-2144	U+2026	# HORIZONTAL ELLIPSIS | ||||||
|  |  3-2145	U+2025	# TWO DOT LEADER | ||||||
|  |  3-2146	U+2018	# LEFT SINGLE QUOTATION MARK | ||||||
|  |  3-2147	U+2019	# RIGHT SINGLE QUOTATION MARK | ||||||
|  |  3-2148	U+201C	# LEFT DOUBLE QUOTATION MARK | ||||||
|  |  3-2149	U+201D	# RIGHT DOUBLE QUOTATION MARK | ||||||
|  | -3-214A	U+0028	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||||
|  | -3-214B	U+0029	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||||
|  | +3-214A	U+FF08	# LEFT PARENTHESIS	Fullwidth: U+FF08
 | ||||||
|  | +3-214B	U+FF09	# RIGHT PARENTHESIS	Fullwidth: U+FF09
 | ||||||
|  |  3-214C	U+3014	# LEFT TORTOISE SHELL BRACKET | ||||||
|  |  3-214D	U+3015	# RIGHT TORTOISE SHELL BRACKET | ||||||
|  | -3-214E	U+005B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||||
|  | -3-214F	U+005D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||||
|  | -3-2150	U+007B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||||
|  | -3-2151	U+007D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||||
|  | +3-214E	U+FF3B	# LEFT SQUARE BRACKET	Fullwidth: U+FF3B
 | ||||||
|  | +3-214F	U+FF3D	# RIGHT SQUARE BRACKET	Fullwidth: U+FF3D
 | ||||||
|  | +3-2150	U+FF5B	# LEFT CURLY BRACKET	Fullwidth: U+FF5B
 | ||||||
|  | +3-2151	U+FF5D	# RIGHT CURLY BRACKET	Fullwidth: U+FF5D
 | ||||||
|  |  3-2152	U+3008	# LEFT ANGLE BRACKET | ||||||
|  |  3-2153	U+3009	# RIGHT ANGLE BRACKET | ||||||
|  |  3-2154	U+300A	# LEFT DOUBLE ANGLE BRACKET | ||||||
|  | @@ -79,15 +79,15 @@
 | ||||||
|  |  3-2159	U+300F	# RIGHT WHITE CORNER BRACKET | ||||||
|  |  3-215A	U+3010	# LEFT BLACK LENTICULAR BRACKET | ||||||
|  |  3-215B	U+3011	# RIGHT BLACK LENTICULAR BRACKET | ||||||
|  | -3-215C	U+002B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||||
|  | +3-215C	U+FF0B	# PLUS SIGN	Fullwidth: U+FF0B
 | ||||||
|  |  3-215D	U+2212	# MINUS SIGN	Windows: U+FF0D | ||||||
|  |  3-215E	U+00B1	# PLUS-MINUS SIGN | ||||||
|  |  3-215F	U+00D7	# MULTIPLICATION SIGN | ||||||
|  |  3-2160	U+00F7	# DIVISION SIGN | ||||||
|  | -3-2161	U+003D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||||
|  | +3-2161	U+FF1D	# EQUALS SIGN	Fullwidth: U+FF1D
 | ||||||
|  |  3-2162	U+2260	# NOT EQUAL TO | ||||||
|  | -3-2163	U+003C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||||
|  | -3-2164	U+003E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||||
|  | +3-2163	U+FF1C	# LESS-THAN SIGN	Fullwidth: U+FF1C
 | ||||||
|  | +3-2164	U+FF1E	# GREATER-THAN SIGN	Fullwidth: U+FF1E
 | ||||||
|  |  3-2165	U+2266	# LESS-THAN OVER EQUAL TO | ||||||
|  |  3-2166	U+2267	# GREATER-THAN OVER EQUAL TO | ||||||
|  |  3-2167	U+221E	# INFINITY | ||||||
|  | @@ -98,15 +98,15 @@
 | ||||||
|  |  3-216C	U+2032	# PRIME | ||||||
|  |  3-216D	U+2033	# DOUBLE PRIME | ||||||
|  |  3-216E	U+2103	# DEGREE CELSIUS | ||||||
|  | -3-216F	U+00A5	# YEN SIGN	Windows: U+FFE5
 | ||||||
|  | -3-2170	U+0024	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||||
|  | +3-216F	U+FFE5	# YEN SIGN	Windows: U+FFE5
 | ||||||
|  | +3-2170	U+FF04	# DOLLAR SIGN	Fullwidth: U+FF04
 | ||||||
|  |  3-2171	U+00A2	# CENT SIGN	Windows: U+FFE0 | ||||||
|  |  3-2172	U+00A3	# POUND SIGN	Windows: U+FFE1 | ||||||
|  | -3-2173	U+0025	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||||
|  | -3-2174	U+0023	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||||
|  | -3-2175	U+0026	# AMPERSAND	Fullwidth: U+FF06
 | ||||||
|  | -3-2176	U+002A	# ASTERISK	Fullwidth: U+FF0A
 | ||||||
|  | -3-2177	U+0040	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||||
|  | +3-2173	U+FF05	# PERCENT SIGN	Fullwidth: U+FF05
 | ||||||
|  | +3-2174	U+FF03	# NUMBER SIGN	Fullwidth: U+FF03
 | ||||||
|  | +3-2175	U+FF06	# AMPERSAND	Fullwidth: U+FF06
 | ||||||
|  | +3-2176	U+FF0A	# ASTERISK	Fullwidth: U+FF0A
 | ||||||
|  | +3-2177	U+FF20	# COMMERCIAL AT	Fullwidth: U+FF20
 | ||||||
|  |  3-2178	U+00A7	# SECTION SIGN | ||||||
|  |  3-2179	U+2606	# WHITE STAR | ||||||
|  |  3-217A	U+2605	# BLACK STAR | ||||||
|  | @@ -128,9 +128,9 @@
 | ||||||
|  |  3-222C	U+2191	# UPWARDS ARROW | ||||||
|  |  3-222D	U+2193	# DOWNWARDS ARROW | ||||||
|  |  3-222E	U+3013	# GETA MARK | ||||||
|  | -3-222F	U+0027	# APOSTROPHE	Fullwidth: U+FF07
 | ||||||
|  | -3-2230	U+0022	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||||
|  | -3-2231	U+002D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||||
|  | +3-222F	U+FF07	# APOSTROPHE	Fullwidth: U+FF07
 | ||||||
|  | +3-2230	U+FF02	# QUOTATION MARK	[2000]	Fullwidth: U+FF02
 | ||||||
|  | +3-2231	U+FF0D	# HYPHEN-MINUS	[2000]	Fullwidth: U+FF0D
 | ||||||
|  |  3-2232	U+007E	# TILDE	[2000]	Fullwidth: U+FF5E | ||||||
|  |  3-2233	U+3033	# VERTICAL KANA REPEAT MARK UPPER HALF	[2000] | ||||||
|  |  3-2234	U+3034	# VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF	[2000] | ||||||
|  | @@ -223,16 +223,16 @@
 | ||||||
|  |  3-232D	U+21E9	# DOWNWARDS WHITE ARROW	[2000] | ||||||
|  |  3-232E	U+2934	# ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS	[2000]	[Unicode3.2] | ||||||
|  |  3-232F	U+2935	# ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS	[2000]	[Unicode3.2] | ||||||
|  | -3-2330	U+0030	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||||
|  | -3-2331	U+0031	# DIGIT ONE	Fullwidth: U+FF11
 | ||||||
|  | -3-2332	U+0032	# DIGIT TWO	Fullwidth: U+FF12
 | ||||||
|  | -3-2333	U+0033	# DIGIT THREE	Fullwidth: U+FF13
 | ||||||
|  | -3-2334	U+0034	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||||
|  | -3-2335	U+0035	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||||
|  | -3-2336	U+0036	# DIGIT SIX	Fullwidth: U+FF16
 | ||||||
|  | -3-2337	U+0037	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||||
|  | -3-2338	U+0038	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||||
|  | -3-2339	U+0039	# DIGIT NINE	Fullwidth: U+FF19
 | ||||||
|  | +3-2330	U+FF10	# DIGIT ZERO	Fullwidth: U+FF10
 | ||||||
|  | +3-2331	U+FF11	# DIGIT ONE	Fullwidth: U+FF11
 | ||||||
|  | +3-2332	U+FF12	# DIGIT TWO	Fullwidth: U+FF12
 | ||||||
|  | +3-2333	U+FF13	# DIGIT THREE	Fullwidth: U+FF13
 | ||||||
|  | +3-2334	U+FF14	# DIGIT FOUR	Fullwidth: U+FF14
 | ||||||
|  | +3-2335	U+FF15	# DIGIT FIVE	Fullwidth: U+FF15
 | ||||||
|  | +3-2336	U+FF16	# DIGIT SIX	Fullwidth: U+FF16
 | ||||||
|  | +3-2337	U+FF17	# DIGIT SEVEN	Fullwidth: U+FF17
 | ||||||
|  | +3-2338	U+FF18	# DIGIT EIGHT	Fullwidth: U+FF18
 | ||||||
|  | +3-2339	U+FF19	# DIGIT NINE	Fullwidth: U+FF19
 | ||||||
|  |  3-233A	U+29BF	# CIRCLED BULLET	[2000]	[Unicode3.2] | ||||||
|  |  3-233B	U+25C9	# FISHEYE	[2000] | ||||||
|  |  3-233C	U+303D	# PART ALTERNATION MARK	[2000]	[Unicode3.2] | ||||||
|  | @@ -240,64 +240,64 @@
 | ||||||
|  |  3-233E	U+FE45	# SESAME DOT	[2000]	[Unicode3.2] | ||||||
|  |  3-233F	U+25E6	# WHITE BULLET	[2000] | ||||||
|  |  3-2340	U+2022	# BULLET	[2000] | ||||||
|  | -3-2341	U+0041	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||||
|  | -3-2342	U+0042	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||||
|  | -3-2343	U+0043	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||||
|  | -3-2344	U+0044	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||||
|  | -3-2345	U+0045	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||||
|  | -3-2346	U+0046	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||||
|  | -3-2347	U+0047	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||||
|  | -3-2348	U+0048	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||||
|  | -3-2349	U+0049	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||||
|  | -3-234A	U+004A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||||
|  | -3-234B	U+004B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||||
|  | -3-234C	U+004C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||||
|  | -3-234D	U+004D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||||
|  | -3-234E	U+004E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||||
|  | -3-234F	U+004F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||||
|  | -3-2350	U+0050	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||||
|  | -3-2351	U+0051	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||||
|  | -3-2352	U+0052	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||||
|  | -3-2353	U+0053	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||||
|  | -3-2354	U+0054	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||||
|  | -3-2355	U+0055	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||||
|  | -3-2356	U+0056	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||||
|  | -3-2357	U+0057	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||||
|  | -3-2358	U+0058	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||||
|  | -3-2359	U+0059	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||||
|  | -3-235A	U+005A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||||
|  | +3-2341	U+FF21	# LATIN CAPITAL LETTER A	Fullwidth: U+FF21
 | ||||||
|  | +3-2342	U+FF22	# LATIN CAPITAL LETTER B	Fullwidth: U+FF22
 | ||||||
|  | +3-2343	U+FF23	# LATIN CAPITAL LETTER C	Fullwidth: U+FF23
 | ||||||
|  | +3-2344	U+FF24	# LATIN CAPITAL LETTER D	Fullwidth: U+FF24
 | ||||||
|  | +3-2345	U+FF25	# LATIN CAPITAL LETTER E	Fullwidth: U+FF25
 | ||||||
|  | +3-2346	U+FF26	# LATIN CAPITAL LETTER F	Fullwidth: U+FF26
 | ||||||
|  | +3-2347	U+FF27	# LATIN CAPITAL LETTER G	Fullwidth: U+FF27
 | ||||||
|  | +3-2348	U+FF28	# LATIN CAPITAL LETTER H	Fullwidth: U+FF28
 | ||||||
|  | +3-2349	U+FF29	# LATIN CAPITAL LETTER I	Fullwidth: U+FF29
 | ||||||
|  | +3-234A	U+FF2A	# LATIN CAPITAL LETTER J	Fullwidth: U+FF2A
 | ||||||
|  | +3-234B	U+FF2B	# LATIN CAPITAL LETTER K	Fullwidth: U+FF2B
 | ||||||
|  | +3-234C	U+FF2C	# LATIN CAPITAL LETTER L	Fullwidth: U+FF2C
 | ||||||
|  | +3-234D	U+FF2D	# LATIN CAPITAL LETTER M	Fullwidth: U+FF2D
 | ||||||
|  | +3-234E	U+FF2E	# LATIN CAPITAL LETTER N	Fullwidth: U+FF2E
 | ||||||
|  | +3-234F	U+FF2F	# LATIN CAPITAL LETTER O	Fullwidth: U+FF2F
 | ||||||
|  | +3-2350	U+FF30	# LATIN CAPITAL LETTER P	Fullwidth: U+FF30
 | ||||||
|  | +3-2351	U+FF31	# LATIN CAPITAL LETTER Q	Fullwidth: U+FF31
 | ||||||
|  | +3-2352	U+FF32	# LATIN CAPITAL LETTER R	Fullwidth: U+FF32
 | ||||||
|  | +3-2353	U+FF33	# LATIN CAPITAL LETTER S	Fullwidth: U+FF33
 | ||||||
|  | +3-2354	U+FF34	# LATIN CAPITAL LETTER T	Fullwidth: U+FF34
 | ||||||
|  | +3-2355	U+FF35	# LATIN CAPITAL LETTER U	Fullwidth: U+FF35
 | ||||||
|  | +3-2356	U+FF36	# LATIN CAPITAL LETTER V	Fullwidth: U+FF36
 | ||||||
|  | +3-2357	U+FF37	# LATIN CAPITAL LETTER W	Fullwidth: U+FF37
 | ||||||
|  | +3-2358	U+FF38	# LATIN CAPITAL LETTER X	Fullwidth: U+FF38
 | ||||||
|  | +3-2359	U+FF39	# LATIN CAPITAL LETTER Y	Fullwidth: U+FF39
 | ||||||
|  | +3-235A	U+FF3A	# LATIN CAPITAL LETTER Z	Fullwidth: U+FF3A
 | ||||||
|  |  3-235B	U+2213	# MINUS-OR-PLUS SIGN	[2000] | ||||||
|  |  3-235C	U+2135	# ALEF SYMBOL	[2000] | ||||||
|  |  3-235D	U+210F	# PLANCK CONSTANT OVER TWO PI	[2000] | ||||||
|  |  3-235E	U+33CB	# SQUARE HP	[2000] | ||||||
|  |  3-235F	U+2113	# SCRIPT SMALL L	[2000] | ||||||
|  |  3-2360	U+2127	# INVERTED OHM SIGN	[2000] | ||||||
|  | -3-2361	U+0061	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||||
|  | -3-2362	U+0062	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||||
|  | -3-2363	U+0063	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||||
|  | -3-2364	U+0064	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||||
|  | -3-2365	U+0065	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||||
|  | -3-2366	U+0066	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||||
|  | -3-2367	U+0067	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||||
|  | -3-2368	U+0068	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||||
|  | -3-2369	U+0069	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||||
|  | -3-236A	U+006A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||||
|  | -3-236B	U+006B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||||
|  | -3-236C	U+006C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||||
|  | -3-236D	U+006D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||||
|  | -3-236E	U+006E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||||
|  | -3-236F	U+006F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||||
|  | -3-2370	U+0070	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||||
|  | -3-2371	U+0071	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||||
|  | -3-2372	U+0072	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||||
|  | -3-2373	U+0073	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||||
|  | -3-2374	U+0074	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||||
|  | -3-2375	U+0075	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||||
|  | -3-2376	U+0076	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||||
|  | -3-2377	U+0077	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||||
|  | -3-2378	U+0078	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||||
|  | -3-2379	U+0079	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||||
|  | -3-237A	U+007A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||||
|  | +3-2361	U+FF41	# LATIN SMALL LETTER A	Fullwidth: U+FF41
 | ||||||
|  | +3-2362	U+FF42	# LATIN SMALL LETTER B	Fullwidth: U+FF42
 | ||||||
|  | +3-2363	U+FF43	# LATIN SMALL LETTER C	Fullwidth: U+FF43
 | ||||||
|  | +3-2364	U+FF44	# LATIN SMALL LETTER D	Fullwidth: U+FF44
 | ||||||
|  | +3-2365	U+FF45	# LATIN SMALL LETTER E	Fullwidth: U+FF45
 | ||||||
|  | +3-2366	U+FF46	# LATIN SMALL LETTER F	Fullwidth: U+FF46
 | ||||||
|  | +3-2367	U+FF47	# LATIN SMALL LETTER G	Fullwidth: U+FF47
 | ||||||
|  | +3-2368	U+FF48	# LATIN SMALL LETTER H	Fullwidth: U+FF48
 | ||||||
|  | +3-2369	U+FF49	# LATIN SMALL LETTER I	Fullwidth: U+FF49
 | ||||||
|  | +3-236A	U+FF4A	# LATIN SMALL LETTER J	Fullwidth: U+FF4A
 | ||||||
|  | +3-236B	U+FF4B	# LATIN SMALL LETTER K	Fullwidth: U+FF4B
 | ||||||
|  | +3-236C	U+FF4C	# LATIN SMALL LETTER L	Fullwidth: U+FF4C
 | ||||||
|  | +3-236D	U+FF4D	# LATIN SMALL LETTER M	Fullwidth: U+FF4D
 | ||||||
|  | +3-236E	U+FF4E	# LATIN SMALL LETTER N	Fullwidth: U+FF4E
 | ||||||
|  | +3-236F	U+FF4F	# LATIN SMALL LETTER O	Fullwidth: U+FF4F
 | ||||||
|  | +3-2370	U+FF50	# LATIN SMALL LETTER P	Fullwidth: U+FF50
 | ||||||
|  | +3-2371	U+FF51	# LATIN SMALL LETTER Q	Fullwidth: U+FF51
 | ||||||
|  | +3-2372	U+FF52	# LATIN SMALL LETTER R	Fullwidth: U+FF52
 | ||||||
|  | +3-2373	U+FF53	# LATIN SMALL LETTER S	Fullwidth: U+FF53
 | ||||||
|  | +3-2374	U+FF54	# LATIN SMALL LETTER T	Fullwidth: U+FF54
 | ||||||
|  | +3-2375	U+FF55	# LATIN SMALL LETTER U	Fullwidth: U+FF55
 | ||||||
|  | +3-2376	U+FF56	# LATIN SMALL LETTER V	Fullwidth: U+FF56
 | ||||||
|  | +3-2377	U+FF57	# LATIN SMALL LETTER W	Fullwidth: U+FF57
 | ||||||
|  | +3-2378	U+FF58	# LATIN SMALL LETTER X	Fullwidth: U+FF58
 | ||||||
|  | +3-2379	U+FF59	# LATIN SMALL LETTER Y	Fullwidth: U+FF59
 | ||||||
|  | +3-237A	U+FF5A	# LATIN SMALL LETTER Z	Fullwidth: U+FF5A
 | ||||||
|  |  3-237B	U+30A0	# KATAKANA-HIRAGANA DOUBLE HYPHEN	[2000]	[Unicode3.2] | ||||||
|  |  3-237C	U+2013	# EN DASH	[2000] | ||||||
|  |  3-237D	U+29FA	# DOUBLE PLUS	[2000]	[Unicode3.2] | ||||||
|  | @@ -1242,7 +1242,7 @@
 | ||||||
|  |  3-2D7C		# <reserved>	Windows: U+222A | ||||||
|  |  3-2D7D	U+2756	# BLACK DIAMOND MINUS WHITE X	[2000] | ||||||
|  |  3-2D7E	U+261E	# WHITE RIGHT POINTING INDEX	[2000] | ||||||
|  | -3-2E21		# <reserved>
 | ||||||
|  | +3-2E21	U+4FF1	# <cjk> [2004]
 | ||||||
|  |  3-2E22	U+2000B	# <cjk>	[2000]	[Unicode3.1]	Private: U+F780 | ||||||
|  |  3-2E23	U+3402	# <cjk>	[2000] | ||||||
|  |  3-2E24	U+4E28	# <cjk>	[2000] | ||||||
|  | @@ -1429,7 +1429,7 @@
 | ||||||
|  |  3-2F7B	U+218BD	# <cjk>	[2000]	[Unicode3.1]	Private: U+F78F | ||||||
|  |  3-2F7C	U+5B19	# <cjk>	[2000] | ||||||
|  |  3-2F7D	U+5B25	# <cjk>	[2000] | ||||||
|  | -3-2F7E		# <reserved>
 | ||||||
|  | +3-2F7E	U+525D	# <cjk> [2004]
 | ||||||
|  |  3-3021	U+4E9C	# <cjk> | ||||||
|  |  3-3022	U+5516	# <cjk> | ||||||
|  |  3-3023	U+5A03	# <cjk> | ||||||
|  | @@ -4395,7 +4395,7 @@
 | ||||||
|  |  3-4F51	U+6E7E	# <cjk> | ||||||
|  |  3-4F52	U+7897	# <cjk> | ||||||
|  |  3-4F53	U+8155	# <cjk> | ||||||
|  | -3-4F54		# <reserved>
 | ||||||
|  | +3-4F54	U+20B9F	# <cjk> [2004]
 | ||||||
|  |  3-4F55	U+5B41	# <cjk>	[2000] | ||||||
|  |  3-4F56	U+5B56	# <cjk>	[2000] | ||||||
|  |  3-4F57	U+5B7D	# <cjk>	[2000] | ||||||
|  | @@ -4437,7 +4437,7 @@
 | ||||||
|  |  3-4F7B	U+5DA7	# <cjk>	[2000] | ||||||
|  |  3-4F7C	U+5DB8	# <cjk>	[2000] | ||||||
|  |  3-4F7D	U+5DCB	# <cjk>	[2000] | ||||||
|  | -3-4F7E		# <reserved>
 | ||||||
|  | +3-4F7E	U+541E	# <cjk> [2004]
 | ||||||
|  |  3-5021	U+5F0C	# <cjk> | ||||||
|  |  3-5022	U+4E10	# <cjk> | ||||||
|  |  3-5023	U+4E15	# <cjk> | ||||||
|  | @@ -7828,7 +7828,7 @@
 | ||||||
|  |  3-7424	U+7464	# <cjk>	[1983] | ||||||
|  |  3-7425	U+51DC	# <cjk>	[1990] | ||||||
|  |  3-7426	U+7199	# <cjk>	[1990] | ||||||
|  | -3-7427		# <reserved>
 | ||||||
|  | +3-7427	U+5653	# <cjk> [2004]
 | ||||||
|  |  3-7428	U+5DE2	# <cjk>	[2000] | ||||||
|  |  3-7429	U+5E14	# <cjk>	[2000] | ||||||
|  |  3-742A	U+5E18	# <cjk>	[2000] | ||||||
|  | @@ -8851,11 +8851,11 @@
 | ||||||
|  |  3-7E77	U+9F94	# <cjk>	[2000] | ||||||
|  |  3-7E78	U+9F97	# <cjk>	[2000] | ||||||
|  |  3-7E79	U+9FA2	# <cjk>	[2000] | ||||||
|  | -3-7E7A		# <reserved>
 | ||||||
|  | -3-7E7B		# <reserved>
 | ||||||
|  | -3-7E7C		# <reserved>
 | ||||||
|  | -3-7E7D		# <reserved>
 | ||||||
|  | -3-7E7E		# <reserved>
 | ||||||
|  | +3-7E7A	U+59F8	# <cjk> [2004]
 | ||||||
|  | +3-7E7B	U+5C5B	# <cjk> [2004]
 | ||||||
|  | +3-7E7C	U+5E77	# <cjk> [2004]
 | ||||||
|  | +3-7E7D	U+7626	# <cjk> [2004]
 | ||||||
|  | +3-7E7E	U+7E6B	# <cjk> [2004]
 | ||||||
|  |  4-2121	U+20089	# <cjk>	[2000]	[Unicode3.1]	Private: U+F7D1 | ||||||
|  |  4-2122	U+4E02	# <cjk>	[2000] | ||||||
|  |  4-2123	U+4E0F	# <cjk>	[2000] | ||||||
|  | @@ -11138,7 +11138,7 @@
 | ||||||
|  |  4-7D38	U+9B10	# <cjk>	[2000] | ||||||
|  |  4-7D39	U+9B12	# <cjk>	[2000] | ||||||
|  |  4-7D3A	U+9B16	# <cjk>	[2000] | ||||||
|  | -4-7D3B	U+9B1D	# <cjk>	[2000]
 | ||||||
|  | +4-7D3B	U+9B1C	# <cjk>	[2000]
 | ||||||
|  |  4-7D3C	U+9B2B	# <cjk>	[2000] | ||||||
|  |  4-7D3D	U+9B33	# <cjk>	[2000] | ||||||
|  |  4-7D3E	U+9B3D	# <cjk>	[2000] | ||||||
							
								
								
									
										30917
									
								
								Tools/unicode/python-mappings/gb-18030-2000.xml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										30917
									
								
								Tools/unicode/python-mappings/gb-18030-2000.xml
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										11294
									
								
								Tools/unicode/python-mappings/jisx0213-2004-std.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11294
									
								
								Tools/unicode/python-mappings/jisx0213-2004-std.txt
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dong-hee Na
						Dong-hee Na