"""Helper module to tranlate 3.5 type comments to 3.6 variable annotations.""" import re import os import ast import argparse import tokenize from collections import defaultdict from textwrap import dedent from io import BytesIO __all__ = ['com2ann', 'TYPE_COM'] TYPE_COM = re.compile('\s*#\s*type\s*:.*$', flags=re.DOTALL) TRAIL_OR_COM = re.compile('\s*$|\s*#.*$', flags=re.DOTALL) class _Data: """Internal class describing global data on file.""" def __init__(self, lines, tokens): self.lines = lines self.tokens = tokens ttab = defaultdict(list) # maps line number to token numbers for i, tok in enumerate(tokens): ttab[tok.start[0]].append(i) self.ttab = ttab self.success = [] # list of lines where type comments where processed self.fail = [] # list of lines where type comments where rejected def skip_blank(d, lno): while d.lines[lno].strip() == '': lno += 1 return lno def find_start(d, lcom): """Find first char of the assignment target.""" i = d.ttab[lcom + 1][-2] # index of type comment token in tokens list while ((d.tokens[i].exact_type != tokenize.NEWLINE) and (d.tokens[i].exact_type != tokenize.ENCODING)): i -= 1 lno = d.tokens[i].start[0] return skip_blank(d, lno) def check_target(stmt): if len(stmt.body): assign = stmt.body[0] else: return False if isinstance(assign, ast.Assign) and len(assign.targets) == 1: targ = assign.targets[0] else: return False if (isinstance(targ, ast.Name) or isinstance(targ, ast.Attribute) or isinstance(targ, ast.Subscript)): return True return False def find_eq(d, lstart): """Find equal sign starting from lstart taking care about d[f(x=1)] = 5.""" col = pars = 0 lno = lstart while d.lines[lno][col] != '=' or pars != 0: ch = d.lines[lno][col] if ch in '([{': pars += 1 elif ch in ')]}': pars -= 1 if ch == '#' or col == len(d.lines[lno])-1: lno = skip_blank(d, lno+1) col = 0 else: col += 1 return lno, col def find_val(d, poseq): """Find position of first char of assignment value starting from poseq.""" lno, col = poseq while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'): if col == len(d.lines[lno])-1: lno += 1 col = 0 else: col += 1 return lno, col def find_targ(d, poseq): """Find position of last char of target (annotation goes here).""" lno, col = poseq while (d.lines[lno][col].isspace() or d.lines[lno][col] in '=\\'): if col == 0: lno -= 1 col = len(d.lines[lno])-1 else: col -= 1 return lno, col+1 def trim(new_lines, string, ltarg, poseq, lcom, ccom): """Remove None or Ellipsis from assignment value. Also remove parens if one has (None), (...) etc. string -- 'None' or '...' ltarg -- line where last char of target is located poseq -- position of equal sign lcom, ccom -- position of type comment """ nopars = lambda s: s.replace('(', '').replace(')', '') leq, ceq = poseq end = ccom if leq == lcom else len(new_lines[leq]) subline = new_lines[leq][:ceq] if leq == ltarg: subline = subline.rstrip() new_lines[leq] = subline + (new_lines[leq][end:] if leq == lcom else new_lines[leq][ceq+1:end]) for lno in range(leq+1,lcom): new_lines[lno] = nopars(new_lines[lno]) if lcom != leq: subline = nopars(new_lines[lcom][:ccom]).replace(string, '') if (not subline.isspace()): subline = subline.rstrip() new_lines[lcom] = subline + new_lines[lcom][ccom:] def _com2ann(d, drop_None, drop_Ellipsis): new_lines = d.lines[:] for lcom, line in enumerate(d.lines): match = re.search(TYPE_COM, line) if match: # strip " # type : annotation \n" -> "annotation \n" tp = match.group().lstrip()[1:].lstrip()[4:].lstrip()[1:].lstrip() submatch = re.search(TRAIL_OR_COM, tp) subcom = '' if submatch and submatch.group(): subcom = submatch.group() tp = tp[:submatch.start()] if tp == 'ignore': continue ccom = match.start() if not any(d.tokens[i].exact_type == tokenize.COMMENT for i in d.ttab[lcom + 1]): d.fail.append(lcom) continue # type comment inside string lstart = find_start(d, lcom) stmt_str = dedent(''.join(d.lines[lstart:lcom+1])) try: stmt = ast.parse(stmt_str) except SyntaxError: d.fail.append(lcom) continue # for or with statements if not check_target(stmt): d.fail.append(lcom) continue d.success.append(lcom) val = stmt.body[0].value # writing output now poseq = find_eq(d, lstart) lval, cval = find_val(d, poseq) ltarg, ctarg = find_targ(d, poseq) op_par = '' cl_par = '' if isinstance(val, ast.Tuple): if d.lines[lval][cval] != '(': op_par = '(' cl_par = ')' # write the comment first new_lines[lcom] = d.lines[lcom][:ccom].rstrip() + cl_par + subcom ccom = len(d.lines[lcom][:ccom].rstrip()) string = False if isinstance(val, ast.Tuple): # t = 1, 2 -> t = (1, 2); only latter is allowed with annotation free_place = int(new_lines[lval][cval-2:cval] == ' ') new_lines[lval] = (new_lines[lval][:cval-free_place] + op_par + new_lines[lval][cval:]) elif isinstance(val, ast.Ellipsis) and drop_Ellipsis: string = '...' elif (isinstance(val, ast.NameConstant) and val.value is None and drop_None): string = 'None' if string: trim(new_lines, string, ltarg, poseq, lcom, ccom) # finally write an annotation new_lines[ltarg] = (new_lines[ltarg][:ctarg] + ': ' + tp + new_lines[ltarg][ctarg:]) return ''.join(new_lines) def com2ann(code, *, drop_None=False, drop_Ellipsis=False, silent=False): """Translate type comments to type annotations in code. Take code as string and return this string where:: variable = value # type: annotation # real comment is translated to:: variable: annotation = value # real comment For unsupported syntax cases, the type comments are left intact. If drop_None is True or if drop_Ellipsis is True translate correcpondingly:: variable = None # type: annotation variable = ... # type: annotation into:: variable: annotation The tool tries to preserve code formatting as much as possible, but an exact translation is not guarateed. A summary of translated comments id printed by default. """ try: ast.parse(code) # we want to work only with file without syntax errors except SyntaxError: return None lines = code.splitlines(keepends=True) rl = BytesIO(code.encode('utf-8')).readline tokens = list(tokenize.tokenize(rl)) data = _Data(lines, tokens) new_code = _com2ann(data, drop_None, drop_Ellipsis) if not silent: if data.success: print('Comments translated on lines:', ', '.join(str(lno+1) for lno in data.success)) if data.fail: print('Comments rejected on lines:', ', '.join(str(lno+1) for lno in data.fail)) if not data.success and not data.fail: print('No type comments found') return new_code def translate_file(infile, outfile, dnone, dell, silent): try: descr = tokenize.open(infile) except SyntaxError: print("Cannot open", infile) return with descr as f: code = f.read() enc = f.encoding if not silent: print('File:', infile) new_code = com2ann(code, drop_None=dnone, drop_Ellipsis=dell, silent=silent) if new_code is None: print("SyntaxError in", infile) return with open(outfile, 'wb') as f: f.write((new_code).encode(enc)) if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("-o", "--outfile", help="output file, will be overwritten if exists,\n" "defaults to input file") parser.add_argument("infile", help="input file or directory for translation, must\n" "contain no syntax errors, for directory\n" "the outfile is ignored and translation is\n" "made in place") parser.add_argument("-s", "--silent", help="Do not print summary for line numbers of\n" "translated and rejected comments", action="store_true") parser.add_argument("-n", "--drop-none", help="drop any None as assignment value during\n" "translation if it is annotated by a type coment", action="store_true") parser.add_argument("-e", "--drop-ellipsis", help="drop any Ellipsis (...) as assignment value during\n" "translation if it is annotated by a type coment", action="store_true") args = parser.parse_args() if args.outfile is None: args.outfile = args.infile if os.path.isfile(args.infile): translate_file(args.infile, args.outfile, args.drop_none, args.drop_ellipsis, args.silent) else: for root, dirs, files in os.walk(args.infile): for afile in files: _, ext = os.path.splitext(afile) if ext == '.py' or ext == '.pyi': fname = os.path.join(root, afile) translate_file(fname, fname, args.drop_none, args.drop_ellipsis, args.silent)