mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	
		
			
	
	
		
			199 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			199 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | # | ||
|  | # genmap_support.py: Multibyte Codec Map Generator | ||
|  | # | ||
|  | # Original Author:  Hye-Shik Chang <perky@FreeBSD.org> | ||
|  | # Modified Author:  Dong-hee Na <donghee.na92@gmail.com> | ||
|  | # | ||
|  | 
 | ||
|  | 
 | ||
|  | class BufferedFiller: | ||
|  |     def __init__(self, column=78): | ||
|  |         self.column = column | ||
|  |         self.buffered = [] | ||
|  |         self.cline = [] | ||
|  |         self.clen = 0 | ||
|  |         self.count = 0 | ||
|  | 
 | ||
|  |     def write(self, *data): | ||
|  |         for s in data: | ||
|  |             if len(s) > self.column: | ||
|  |                 raise ValueError("token is too long") | ||
|  |             if len(s) + self.clen > self.column: | ||
|  |                 self.flush() | ||
|  |             self.clen += len(s) | ||
|  |             self.cline.append(s) | ||
|  |             self.count += 1 | ||
|  | 
 | ||
|  |     def flush(self): | ||
|  |         if not self.cline: | ||
|  |             return | ||
|  |         self.buffered.append(''.join(self.cline)) | ||
|  |         self.clen = 0 | ||
|  |         del self.cline[:] | ||
|  | 
 | ||
|  |     def printout(self, fp): | ||
|  |         self.flush() | ||
|  |         for l in self.buffered: | ||
|  |             fp.write(f'{l}\n') | ||
|  |         del self.buffered[:] | ||
|  | 
 | ||
|  |     def __len__(self): | ||
|  |         return self.count | ||
|  | 
 | ||
|  | 
 | ||
|  | class DecodeMapWriter: | ||
|  |     filler_class = BufferedFiller | ||
|  | 
 | ||
|  |     def __init__(self, fp, prefix, decode_map): | ||
|  |         self.fp = fp | ||
|  |         self.prefix = prefix | ||
|  |         self.decode_map = decode_map | ||
|  |         self.filler = self.filler_class() | ||
|  | 
 | ||
|  |     def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): | ||
|  |         c2values = range(c2range[0], c2range[1] + 1) | ||
|  | 
 | ||
|  |         for c1 in range(c1range[0], c1range[1] + 1): | ||
|  |             if c1 not in self.decode_map or (onlymask and c1 not in onlymask): | ||
|  |                 continue | ||
|  |             c2map = self.decode_map[c1] | ||
|  |             rc2values = [n for n in c2values if n in c2map] | ||
|  |             if not rc2values: | ||
|  |                 continue | ||
|  | 
 | ||
|  |             c2map[self.prefix] = True | ||
|  |             c2map['min'] = rc2values[0] | ||
|  |             c2map['max'] = rc2values[-1] | ||
|  |             c2map['midx'] = len(self.filler) | ||
|  | 
 | ||
|  |             for v in range(rc2values[0], rc2values[-1] + 1): | ||
|  |                 if v in c2map: | ||
|  |                     self.filler.write('%d,' % c2map[v]) | ||
|  |                 else: | ||
|  |                     self.filler.write('U,') | ||
|  | 
 | ||
|  |     def generate(self, wide=False): | ||
|  |         if not wide: | ||
|  |             self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") | ||
|  |         else: | ||
|  |             self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") | ||
|  | 
 | ||
|  |         self.filler.printout(self.fp) | ||
|  |         self.fp.write("};\n\n") | ||
|  | 
 | ||
|  |         if not wide: | ||
|  |             self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") | ||
|  |         else: | ||
|  |             self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") | ||
|  | 
 | ||
|  |         for i in range(256): | ||
|  |             if i in self.decode_map and self.prefix in self.decode_map[i]: | ||
|  |                 m = self.decode_map | ||
|  |                 prefix = self.prefix | ||
|  |             else: | ||
|  |                 self.filler.write("{", "0,", "0,", "0", "},") | ||
|  |                 continue | ||
|  | 
 | ||
|  |             self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], | ||
|  |                               ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") | ||
|  |         self.filler.printout(self.fp) | ||
|  |         self.fp.write("};\n\n") | ||
|  | 
 | ||
|  | 
 | ||
|  | class EncodeMapWriter: | ||
|  |     filler_class = BufferedFiller | ||
|  |     elemtype = 'DBCHAR' | ||
|  |     indextype = 'struct unim_index' | ||
|  | 
 | ||
|  |     def __init__(self, fp, prefix, encode_map): | ||
|  |         self.fp = fp | ||
|  |         self.prefix = prefix | ||
|  |         self.encode_map = encode_map | ||
|  |         self.filler = self.filler_class() | ||
|  | 
 | ||
|  |     def generate(self): | ||
|  |         self.buildmap() | ||
|  |         self.printmap() | ||
|  | 
 | ||
|  |     def buildmap(self): | ||
|  |         for c1 in range(0, 256): | ||
|  |             if c1 not in self.encode_map: | ||
|  |                 continue | ||
|  |             c2map = self.encode_map[c1] | ||
|  |             rc2values = [k for k in c2map.keys()] | ||
|  |             rc2values.sort() | ||
|  |             if not rc2values: | ||
|  |                 continue | ||
|  | 
 | ||
|  |             c2map[self.prefix] = True | ||
|  |             c2map['min'] = rc2values[0] | ||
|  |             c2map['max'] = rc2values[-1] | ||
|  |             c2map['midx'] = len(self.filler) | ||
|  | 
 | ||
|  |             for v in range(rc2values[0], rc2values[-1] + 1): | ||
|  |                 if v not in c2map: | ||
|  |                     self.write_nochar() | ||
|  |                 elif isinstance(c2map[v], int): | ||
|  |                     self.write_char(c2map[v]) | ||
|  |                 elif isinstance(c2map[v], tuple): | ||
|  |                     self.write_multic(c2map[v]) | ||
|  |                 else: | ||
|  |                     raise ValueError | ||
|  | 
 | ||
|  |     def write_nochar(self): | ||
|  |         self.filler.write('N,') | ||
|  | 
 | ||
|  |     def write_multic(self, point): | ||
|  |         self.filler.write('M,') | ||
|  | 
 | ||
|  |     def write_char(self, point): | ||
|  |         self.filler.write(str(point) + ',') | ||
|  | 
 | ||
|  |     def printmap(self): | ||
|  |         self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") | ||
|  |         self.filler.printout(self.fp) | ||
|  |         self.fp.write("};\n\n") | ||
|  |         self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") | ||
|  | 
 | ||
|  |         for i in range(256): | ||
|  |             if i in self.encode_map and self.prefix in self.encode_map[i]: | ||
|  |                 self.filler.write("{", "__%s_encmap" % self.prefix, "+", | ||
|  |                                   "%d" % self.encode_map[i]['midx'], ",", | ||
|  |                                   "%d," % self.encode_map[i]['min'], | ||
|  |                                   "%d" % self.encode_map[i]['max'], "},") | ||
|  |             else: | ||
|  |                 self.filler.write("{", "0,", "0,", "0", "},") | ||
|  |                 continue | ||
|  |         self.filler.printout(self.fp) | ||
|  |         self.fp.write("};\n\n") | ||
|  | 
 | ||
|  | 
 | ||
|  | def open_mapping_file(path, source): | ||
|  |     try: | ||
|  |         f = open(path) | ||
|  |     except IOError: | ||
|  |         raise SystemExit(f'{source} is needed') | ||
|  |     return f | ||
|  | 
 | ||
|  | 
 | ||
|  | def print_autogen(fo, source): | ||
|  |     fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') | ||
|  | 
 | ||
|  | 
 | ||
|  | def loadmap(fo, natcol=0, unicol=1, sbcs=0): | ||
|  |     print("Loading from", fo) | ||
|  |     fo.seek(0, 0) | ||
|  |     decmap = {} | ||
|  |     for line in fo: | ||
|  |         line = line.split('#', 1)[0].strip() | ||
|  |         if not line or len(line.split()) < 2: | ||
|  |             continue | ||
|  | 
 | ||
|  |         row = [eval(e) for e in line.split()] | ||
|  |         loc, uni = row[natcol], row[unicol] | ||
|  |         if loc >= 0x100 or sbcs: | ||
|  |             decmap.setdefault((loc >> 8), {}) | ||
|  |             decmap[(loc >> 8)][(loc & 0xff)] = uni | ||
|  | 
 | ||
|  |     return decmap |