| 
									
										
										
										
											2011-07-26 18:01:08 +02:00
										 |  |  | #!/usr/bin/env python3 | 
					
						
							| 
									
										
										
										
											2010-12-03 20:14:31 +00:00
										 |  |  | # This script converts a C file to use the PEP 384 type definition API | 
					
						
							|  |  |  | # Usage: abitype.py < old_code > new_code | 
					
						
							|  |  |  | import re, sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ###### Replacement of PyTypeObject static instances ############## | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # classify each token, giving it a one-letter code: | 
					
						
							|  |  |  | # S: static | 
					
						
							|  |  |  | # T: PyTypeObject | 
					
						
							|  |  |  | # I: ident | 
					
						
							|  |  |  | # W: whitespace | 
					
						
							|  |  |  | # =, {, }, ; : themselves | 
					
						
							|  |  |  | def classify(): | 
					
						
							|  |  |  |     res = [] | 
					
						
							|  |  |  |     for t,v in tokens: | 
					
						
							|  |  |  |         if t == 'other' and v in "={};": | 
					
						
							|  |  |  |             res.append(v) | 
					
						
							|  |  |  |         elif t == 'ident': | 
					
						
							|  |  |  |             if v == 'PyTypeObject': | 
					
						
							|  |  |  |                 res.append('T') | 
					
						
							|  |  |  |             elif v == 'static': | 
					
						
							|  |  |  |                 res.append('S') | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 res.append('I') | 
					
						
							|  |  |  |         elif t == 'ws': | 
					
						
							|  |  |  |             res.append('W') | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             res.append('.') | 
					
						
							|  |  |  |     return ''.join(res) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Obtain a list of fields of a PyTypeObject, in declaration order, | 
					
						
							|  |  |  | # skipping ob_base | 
					
						
							|  |  |  | # All comments are dropped from the variable (which are typically | 
					
						
							|  |  |  | # just the slot names, anyway), and information is discarded whether | 
					
						
							|  |  |  | # the original type was static. | 
					
						
							|  |  |  | def get_fields(start, real_end): | 
					
						
							|  |  |  |     pos = start | 
					
						
							|  |  |  |     # static? | 
					
						
							|  |  |  |     if tokens[pos][1] == 'static': | 
					
						
							|  |  |  |         pos += 2 | 
					
						
							|  |  |  |     # PyTypeObject | 
					
						
							|  |  |  |     pos += 2 | 
					
						
							|  |  |  |     # name | 
					
						
							|  |  |  |     name = tokens[pos][1] | 
					
						
							|  |  |  |     pos += 1 | 
					
						
							|  |  |  |     while tokens[pos][1] != '{': | 
					
						
							|  |  |  |         pos += 1 | 
					
						
							|  |  |  |     pos += 1 | 
					
						
							|  |  |  |     # PyVarObject_HEAD_INIT | 
					
						
							|  |  |  |     while tokens[pos][0] in ('ws', 'comment'): | 
					
						
							|  |  |  |         pos += 1 | 
					
						
							|  |  |  |     if tokens[pos][1] != 'PyVarObject_HEAD_INIT': | 
					
						
							| 
									
										
										
										
											2012-04-04 21:28:14 -04:00
										 |  |  |         raise Exception('%s has no PyVarObject_HEAD_INIT' % name) | 
					
						
							| 
									
										
										
										
											2010-12-03 20:14:31 +00:00
										 |  |  |     while tokens[pos][1] != ')': | 
					
						
							|  |  |  |         pos += 1 | 
					
						
							|  |  |  |     pos += 1 | 
					
						
							|  |  |  |     # field definitions: various tokens, comma-separated | 
					
						
							|  |  |  |     fields = [] | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         while tokens[pos][0] in ('ws', 'comment'): | 
					
						
							|  |  |  |             pos += 1 | 
					
						
							|  |  |  |         end = pos | 
					
						
							|  |  |  |         while tokens[end][1] not in ',}': | 
					
						
							|  |  |  |             if tokens[end][1] == '(': | 
					
						
							|  |  |  |                 nesting = 1 | 
					
						
							|  |  |  |                 while nesting: | 
					
						
							|  |  |  |                     end += 1 | 
					
						
							|  |  |  |                     if tokens[end][1] == '(': nesting+=1 | 
					
						
							|  |  |  |                     if tokens[end][1] == ')': nesting-=1 | 
					
						
							|  |  |  |             end += 1 | 
					
						
							|  |  |  |         assert end < real_end | 
					
						
							|  |  |  |         # join field, excluding separator and trailing ws | 
					
						
							|  |  |  |         end1 = end-1 | 
					
						
							|  |  |  |         while tokens[end1][0] in ('ws', 'comment'): | 
					
						
							|  |  |  |             end1 -= 1 | 
					
						
							|  |  |  |         fields.append(''.join(t[1] for t in tokens[pos:end1+1])) | 
					
						
							|  |  |  |         if tokens[end][1] == '}': | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         pos = end+1 | 
					
						
							|  |  |  |     return name, fields | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # List of type slots as of Python 3.2, omitting ob_base | 
					
						
							|  |  |  | typeslots = [ | 
					
						
							|  |  |  |     'tp_name', | 
					
						
							|  |  |  |     'tp_basicsize', | 
					
						
							|  |  |  |     'tp_itemsize', | 
					
						
							|  |  |  |     'tp_dealloc', | 
					
						
							|  |  |  |     'tp_print', | 
					
						
							|  |  |  |     'tp_getattr', | 
					
						
							|  |  |  |     'tp_setattr', | 
					
						
							|  |  |  |     'tp_reserved', | 
					
						
							|  |  |  |     'tp_repr', | 
					
						
							|  |  |  |     'tp_as_number', | 
					
						
							|  |  |  |     'tp_as_sequence', | 
					
						
							|  |  |  |     'tp_as_mapping', | 
					
						
							|  |  |  |     'tp_hash', | 
					
						
							|  |  |  |     'tp_call', | 
					
						
							|  |  |  |     'tp_str', | 
					
						
							|  |  |  |     'tp_getattro', | 
					
						
							|  |  |  |     'tp_setattro', | 
					
						
							|  |  |  |     'tp_as_buffer', | 
					
						
							|  |  |  |     'tp_flags', | 
					
						
							|  |  |  |     'tp_doc', | 
					
						
							|  |  |  |     'tp_traverse', | 
					
						
							|  |  |  |     'tp_clear', | 
					
						
							|  |  |  |     'tp_richcompare', | 
					
						
							|  |  |  |     'tp_weaklistoffset', | 
					
						
							|  |  |  |     'tp_iter', | 
					
						
							|  |  |  |     'iternextfunc', | 
					
						
							|  |  |  |     'tp_methods', | 
					
						
							|  |  |  |     'tp_members', | 
					
						
							|  |  |  |     'tp_getset', | 
					
						
							|  |  |  |     'tp_base', | 
					
						
							|  |  |  |     'tp_dict', | 
					
						
							|  |  |  |     'tp_descr_get', | 
					
						
							|  |  |  |     'tp_descr_set', | 
					
						
							|  |  |  |     'tp_dictoffset', | 
					
						
							|  |  |  |     'tp_init', | 
					
						
							|  |  |  |     'tp_alloc', | 
					
						
							|  |  |  |     'tp_new', | 
					
						
							|  |  |  |     'tp_free', | 
					
						
							|  |  |  |     'tp_is_gc', | 
					
						
							|  |  |  |     'tp_bases', | 
					
						
							|  |  |  |     'tp_mro', | 
					
						
							|  |  |  |     'tp_cache', | 
					
						
							|  |  |  |     'tp_subclasses', | 
					
						
							|  |  |  |     'tp_weaklist', | 
					
						
							| 
									
										
										
										
											2013-05-17 10:17:43 +03:00
										 |  |  |     'tp_del', | 
					
						
							|  |  |  |     'tp_version_tag', | 
					
						
							| 
									
										
										
										
											2010-12-03 20:14:31 +00:00
										 |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Generate a PyType_Spec definition | 
					
						
							|  |  |  | def make_slots(name, fields): | 
					
						
							|  |  |  |     res = [] | 
					
						
							|  |  |  |     res.append('static PyType_Slot %s_slots[] = {' % name) | 
					
						
							|  |  |  |     # defaults for spec | 
					
						
							| 
									
										
										
										
											2011-02-11 20:47:49 +00:00
										 |  |  |     spec = { 'tp_itemsize':'0' } | 
					
						
							| 
									
										
										
										
											2010-12-03 20:14:31 +00:00
										 |  |  |     for i, val in enumerate(fields): | 
					
						
							|  |  |  |         if val.endswith('0'): | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize', | 
					
						
							|  |  |  |                          'tp_itemsize', 'tp_flags'): | 
					
						
							|  |  |  |             spec[typeslots[i]] = val | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         res.append('    {Py_%s, %s},' % (typeslots[i], val)) | 
					
						
							|  |  |  |     res.append('};') | 
					
						
							|  |  |  |     res.append('static PyType_Spec %s_spec = {' % name) | 
					
						
							|  |  |  |     res.append('    %s,' % spec['tp_name']) | 
					
						
							|  |  |  |     res.append('    %s,' % spec['tp_basicsize']) | 
					
						
							|  |  |  |     res.append('    %s,' % spec['tp_itemsize']) | 
					
						
							|  |  |  |     res.append('    %s,' % spec['tp_flags']) | 
					
						
							|  |  |  |     res.append('    %s_slots,' % name) | 
					
						
							|  |  |  |     res.append('};\n') | 
					
						
							|  |  |  |     return '\n'.join(res) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-04-04 21:28:14 -04:00
										 |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ############ Simplistic C scanner ################################## | 
					
						
							|  |  |  |     tokenizer = re.compile( | 
					
						
							|  |  |  |         r"(?P<preproc>#.*\n)" | 
					
						
							|  |  |  |         r"|(?P<comment>/\*.*?\*/)" | 
					
						
							|  |  |  |         r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)" | 
					
						
							|  |  |  |         r"|(?P<ws>[ \t\n]+)" | 
					
						
							|  |  |  |         r"|(?P<other>.)", | 
					
						
							|  |  |  |         re.MULTILINE) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     tokens = [] | 
					
						
							|  |  |  |     source = sys.stdin.read() | 
					
						
							|  |  |  |     pos = 0 | 
					
						
							|  |  |  |     while pos != len(source): | 
					
						
							|  |  |  |         m = tokenizer.match(source, pos) | 
					
						
							|  |  |  |         tokens.append([m.lastgroup, m.group()]) | 
					
						
							|  |  |  |         pos += len(tokens[-1][1]) | 
					
						
							|  |  |  |         if tokens[-1][0] == 'preproc': | 
					
						
							|  |  |  |             # continuation lines are considered | 
					
						
							|  |  |  |             # only in preprocess statements | 
					
						
							|  |  |  |             while tokens[-1][1].endswith('\\\n'): | 
					
						
							|  |  |  |                 nl = source.find('\n', pos) | 
					
						
							|  |  |  |                 if nl == -1: | 
					
						
							|  |  |  |                     line = source[pos:] | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     line = source[pos:nl+1] | 
					
						
							|  |  |  |                 tokens[-1][1] += line | 
					
						
							|  |  |  |                 pos += len(line) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Main loop: replace all static PyTypeObjects until | 
					
						
							|  |  |  |     # there are none left. | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         c = classify() | 
					
						
							|  |  |  |         m = re.search('(SW)?TWIW?=W?{.*?};', c) | 
					
						
							|  |  |  |         if not m: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         start = m.start() | 
					
						
							|  |  |  |         end = m.end() | 
					
						
							| 
									
										
										
										
											2013-05-17 10:17:43 +03:00
										 |  |  |         name, fields = get_fields(start, end) | 
					
						
							| 
									
										
										
										
											2012-04-04 21:28:14 -04:00
										 |  |  |         tokens[start:end] = [('',make_slots(name, fields))] | 
					
						
							| 
									
										
										
										
											2010-12-03 20:14:31 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-04-04 21:28:14 -04:00
										 |  |  |     # Output result to stdout | 
					
						
							|  |  |  |     for t, v in tokens: | 
					
						
							|  |  |  |         sys.stdout.write(v) |