diff options
Diffstat (limited to 'tools/gen_struct_info.py')
-rw-r--r-- | tools/gen_struct_info.py | 335 |
1 files changed, 238 insertions, 97 deletions
diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py index 563b4fa1..203a18ec 100644 --- a/tools/gen_struct_info.py +++ b/tools/gen_struct_info.py @@ -75,29 +75,38 @@ The JSON output format is based on the return value of Runtime.generateStructInf ''' -import sys -import os -import json -import argparse -import tempfile -import subprocess +import sys, os, re, json, argparse, tempfile, subprocess import shared +QUIET = (__name__ != '__main__') + +def show(msg): + global QUIET + if not QUIET: + sys.stderr.write(msg + '\n') # Try to load pycparser. try: import pycparser except ImportError: # The import failed, warn the user. - sys.stderr.write('WARN: pycparser isn\'t available. I won\'t be able to parse C files, only .json files.\n') + show('WARN: pycparser isn\'t available. I won\'t be able to parse C files, only .json files.') def parse_header(path, cpp_opts): # Tell the user how to get pycparser, if he tries to parse a C file. sys.stderr.write('ERR: I need pycparser to process C files. \n') - sys.stderr.write(' Use "pip install pycparser" or go to "https://github.com/eliben/pycparser" to install it.\n') + sys.stderr.write(' Use "pip install pycparser" to install or download it from "https://github.com/eliben/pycparser".\n') sys.exit(1) else: # We successfully imported pycparser, the script will be completely functional. + class DelayedRef(object): + def __init__(self, dest): + self.dest = dest + + def __str__(self): + return self.dest + + # For a list of node types and their fields, look here: https://github.com/eliben/pycparser/blob/master/pycparser/_c_ast.cfg class FieldVisitor(pycparser.c_ast.NodeVisitor): def __init__(self): self._name = None @@ -106,21 +115,30 @@ else: def visit_Struct(self, node): if node.decls == None: - # Skip empty struct declarations. + self.named_structs[self._name] = DelayedRef(node.name) return + fields = [] for decl in node.decls: - # Look for nested structs. - subwalk = FieldVisitor() - subwalk.visit(decl) + if decl.name == None: + # Well, this field doesn't have a name. + continue - if subwalk.named_structs: - # Store the nested fields. - fields.append(subwalk.named_structs) - else: - # Just store the field name. + if decl.type != None and isinstance(decl.type, pycparser.c_ast.PtrDecl): + # This field is a pointer, there's no point in looking for nested structs. fields.append(decl.name) + else: + # Look for nested structs. + subwalk = FieldVisitor() + subwalk.visit(decl) + + if subwalk.named_structs: + # Store the nested fields. + fields.append(subwalk.named_structs) + else: + # Just store the field name. + fields.append(decl.name) if node.name != None: self.structs[node.name] = fields @@ -136,9 +154,68 @@ else: self._name = node.declname self.generic_visit(node) self._name = old_name - + + # The first parameter is a structure, the second is a path (a list containing all the keys, needed to reach the destination). + # The last parameter is an item to look for. This function will try to follow the path into the given object and then look there for this key. + # As long as the nested object doesn't have the given key, it will descent into the next higher object till it finds the given key. + # + # Example: + # + # res = look_through({ + # 'la1': { + # 'lb1': { + # 'lc1': 99, + # 'lc2': { 'ld1': 11 } + # 'lc2': 200 + # }, + # 'nice': 100 + # }, + # 'nice': 300 + # }, ['la1', 'lb1', 'lc2'], 'nice') + # + # print(res) # Prints 100 . + # + # In this case the function looked inside obj['la1']['lb1']['lc2']['nice'], then obj['la1']['lb1']['nice'] and found the value + # in obj['la1']['nice']. As soon as it finds a value it returns it and stops looking. + def look_through(obj, path, name): + cur_level = obj + path = path[:] + for i, p in enumerate(path): + cur_level = cur_level[p] + path[i] = cur_level + + path = [ obj ] + path + + while len(path) > 0: + if name in path[-1]: + return path[-1][name] + else: + path.pop() + + return None + + # Use the above function to resolve all DelayedRef() inside a list or dict recursively. + def resolve_delayed(item, root=None, path=[]): + if root == None: + root = item + + if isinstance(item, DelayedRef): + if item.dest in path: + show('WARN: Circular reference found! Field "' + path[-1] + '" references "' + item.dest + '"! (Path = ' + '/'.join([str(part) for part in path]) + ')') + return { '__ref__': item.dest } + else: + return look_through(root, path[:-1], item.dest) + elif isinstance(item, dict): + for name, val in item.items(): + item[name] = resolve_delayed(val, root, path + [ name ]) + elif isinstance(item, list): + for i, val in enumerate(item): + item[i] = resolve_delayed(val, root, path + [ i ]) + + return item + def parse_header(path, cpp_opts): - sys.stderr.write('Parsing header "' + path + '"...\n') + show('Parsing header "' + path + '"...') # Use clang -E as the preprocessor for pycparser. ast = pycparser.parse_file(path, True, cpp_path=shared.CLANG_CC, cpp_args=['-E'] + cpp_opts) @@ -146,10 +223,37 @@ else: # Walk the parsed AST and filter out all the declared structs and their fields. walker = FieldVisitor() walker.visit(ast) - return walker.structs + + walker.structs = resolve_delayed(walker.structs) + with open(path, 'r') as stream: + defines = re.findall(r'(?:^|\n)#define\s+([A-Z|_]+)\s.*', stream.read()) + + return { + 'file': path, + 'defines': defines, + 'structs': walker.structs + } # The following three functions generate C code. The output of the compiled code will be # parsed later on and then put back together into a dict structure by parse_c_output(). +# +# Example: +# c_descent('test1', code) +# c_set('item', 'i%i', '111', code) +# c_set('item2', 'i%i', '9', code) +# c_set('item3', 's%s', '"Hello"', code) +# c_ascent(code) +# c_set('outer', 'f%f', '0.999', code) +# +# Will result in: +# { +# 'test1': { +# 'item': 111, +# 'item2': 9, +# 'item3': 'Hello', +# }, +# 'outer': 0.999 +# } def c_set(name, type_, value, code): code.append('printf("K' + name + '\\n");') code.append('printf("V' + type_ + '\\n", ' + value + ');') @@ -173,7 +277,14 @@ def parse_c_output(lines): key = arg elif line[0] == 'V': # A value - cur_level[key] = int(arg) + if arg[0] == 'i': + arg = int(arg[1:]) + elif arg[0] == 'f': + arg = float(arg[1:]) + elif arg[0] == 's': + arg = arg[1:] + + cur_level[key] = arg elif line[0] == 'D': # Remember the current level as the last parent. parent.append(cur_level) @@ -188,11 +299,19 @@ def parse_c_output(lines): return result def gen_inspect_code(path, struct, code): + if path[0][-1] == '#': + path[0] = path[0][:-1] + prefix = '' + else: + prefix = 'struct ' + c_descent(path[-1], code) + if len(path) == 1: - c_set('__size__', '%lu', 'sizeof (struct ' + path[0] + ')', code) + c_set('__size__', 'i%u', 'sizeof (' + prefix + path[0] + ')', code) else: - c_set('__size__', '%lu', 'sizeof ((struct ' + path[0] + ' *)0)->' + '.'.join(path[1:]), code) + c_set('__size__', 'i%u', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code) + #c_set('__offset__', 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code) for field in struct: if isinstance(field, dict): @@ -200,12 +319,12 @@ def gen_inspect_code(path, struct, code): fname = field.keys()[0] gen_inspect_code(path + [fname], field[fname], code) else: - c_set(field, '%u', 'offsetof(struct ' + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code) + c_set(field, 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code) c_ascent(code) def inspect_code(headers, cpp_opts, structs, defines): - sys.stderr.write('Generating C code...\n') + show('Generating C code...') code = ['#include <stdio.h>', '#include <stddef.h>'] # Include all the needed headers. @@ -219,19 +338,27 @@ def inspect_code(headers, cpp_opts, structs, defines): c_ascent(code) c_descent('defines', code) - for name in defines: - if isinstance(name, list): - type_, name = name - else: - type_ = 'i' - c_set(name, '%' + type_, name, code) + for name, type_ in defines.items(): + # Add the necessary python type, if missing. + if '%' not in type_: + if type_[-1] in ('d', 'i', 'u'): + # integer + type_ = 'i%' + type_ + elif type_[-1] in ('f', 'F', 'e', 'E', 'g', 'G'): + # float + type_ = 'f%' + type_ + elif type_[-1] in ('x', 'X', 'a', 'A', 'c', 's'): + # hexadecimal or string + type_ = 's%' + type_ + + c_set(name, type_, name, code) code.append('return 0;') code.append('}') # Write the source code to a temporary file. src_file = tempfile.mkstemp('.c') - bin_file = tempfile.mkstemp() + bin_file = tempfile.mkstemp('.ll') os.write(src_file[0], '\n'.join(code)) @@ -239,15 +366,18 @@ def inspect_code(headers, cpp_opts, structs, defines): os.close(src_file[0]) os.close(bin_file[0]) + # NOTE: We can't generate an executable in the next step because it won't run on the current system without changing the target. + # If we change the target, some type sizes will change resulting in wrong data. As a workaround, we will be generating bitcode and + # run that with the LLVM interpreter. That way we can use the default target and still run the code. info = [] try: # Compile the program. - sys.stderr.write('Compiling generated code...\n') - subprocess.check_call([shared.CLANG_CC] + cpp_opts + ['-o', bin_file[1], src_file[1]]) + show('Compiling generated code...') + subprocess.check_call([shared.CLANG_CC, '-emit-llvm', '-S'] + cpp_opts + ['-o', bin_file[1], src_file[1]]) # Run the compiled program. - sys.stderr.write('Calling generated program...\n') - info = subprocess.check_output([bin_file[1]]).splitlines() + show('Calling generated program...') + info = subprocess.check_output([shared.LLVM_INTERPRETER, bin_file[1]]).splitlines() except subprocess.CalledProcessError: if os.path.isfile(bin_file[1]): sys.stderr.write('FAIL: Running the generated program failed!\n') @@ -263,47 +393,75 @@ def inspect_code(headers, cpp_opts, structs, defines): os.unlink(bin_file[1]) # Parse the output of the program into a dict. - data = parse_c_output(info) + return parse_c_output(info) + +def parse_json(path, header_files, structs, defines): + with open(path, 'r') as stream: + # Remove comments before loading the JSON. + data = json.loads(re.sub(r'//.*\n', '', stream.read())) - # Convert all the define's values into the appropriate python types (based on the type passed to printf). - for name in defines: - if isinstance(name, list): - type_, name = name - else: - type_ = 'i' + if not isinstance(data, list): + data = [ data ] + + for item in data: + header_files.append(item['file']) + for name, data in item['structs'].items(): + if name in structs: + show('WARN: Description of struct "' + name + '" in file "' + item['file'] + '" replaces an existing description!') + + structs[name] = data - if type_[-1] in ('d', 'i', 'u'): - # Integer - data['defines'][name] = int(data['defines'][name]) - elif type_[-1] in ('x', 'X', 'a', 'A'): - # Hexadecimal - data['defines'][name] = float.fromhex(data['defines'][name]) - elif type_[-1] in ('f', 'F', 'e', 'E', 'g', 'G'): - # Float - data['defines'][name] = float(data['defines'][name]) - # Leave everything else untouched. - - return data + for part in item['defines']: + if not isinstance(part, list): + # If no type is specified, assume integer. + part = ['i', part] + + if part[1] in defines: + show('WARN: Description of define "' + part[1] + '" in file "' + item['file'] + '" replaces an existing description!') + + defines[part[1]] = part[0] + +def output_json(obj, compressed=True, stream=None): + if stream == None: + stream = sys.stdout + elif isinstance(stream, str): + stream = open(stream, 'w') + + if compressed: + json.dump(obj, stream, separators=(',', ':')) + else: + json.dump(obj, stream, indent=4, sort_keys=True) + + stream.close() + +def filter_opts(opts): + # Only apply compiler options regarding syntax, includes and defines. + # We have to compile for the current system, we aren't compiling to bitcode after all. + out = [] + for flag in opts: + if flag[:2] in ('-f', '-I', '-i', '-D', '-U'): + out.append(flag) + + return out -def main(): +def main(args): + global QUIET + parser = argparse.ArgumentParser(description='Generate JSON infos for structs.') parser.add_argument('headers', nargs='+', help='A header (.h) file or a JSON file with a list of structs and their fields') - parser.add_argument('-f', dest='list_fields', action='store_true', default=False, help='Output a list of structs and fields for the first header.') + parser.add_argument('-q', dest='quiet', action='store_true', default=False, help='Don\'t output anything besides error messages.') + parser.add_argument('-f', dest='list_fields', action='store_true', default=False, help='Output a list of structs and fields for the given headers.') parser.add_argument('-p', dest='pretty_print', action='store_true', default=False, help='Pretty print the outputted JSON.') parser.add_argument('-o', dest='output', metavar='path', default=None, help='Path to the JSON file that will be written. If omitted, the generated data will be printed to stdout.') parser.add_argument('-I', dest='includes', metavar='dir', action='append', default=[], help='Add directory to include search path') parser.add_argument('-D', dest='defines', metavar='define', action='append', default=[], help='Pass a define to the preprocessor') parser.add_argument('-U', dest='undefines', metavar='undefine', action='append', default=[], help='Pass an undefine to the preprocessor') - args = parser.parse_args() + args = parser.parse_args(args) - # Avoid parsing problems due to gcc specifc syntax. - cpp_opts = ['-U__GNUC__'] + QUIET = args.quiet - # Only apply compiler options regarding syntax, includes and defines. - # We have to compile for the current system, we aren't compiling to bitcode after all. - for flag in shared.COMPILER_OPTS: - if flag[:2] in ('-f', '-I', '-i', '-D', '-U'): - cpp_opts.append(flag) + # Avoid parsing problems due to gcc specifc syntax. + cpp_opts = ['-U__GNUC__', '-D_GNU_SOURCE'] + shared.COMPILER_OPTS # Add the user options to the list as well. for path in args.includes: @@ -316,20 +474,15 @@ def main(): cpp_opts.append('-U' + arg) if args.list_fields: - # Just parse the first header and output the result. - structs = parse_header(args.headers[0], cpp_opts) - data = { - 'file': args.headers[0], - 'structs': structs, - 'defines': [] - } - - if args.output == None: - sys.stdout.write(json.dumps(data, indent=4 if args.pretty_print else None)) - else: - with open(args.output, 'w') as stream: - json.dump(data, stream, indent=4 if args.pretty_print else None) + # Just parse the given headers and output the result. + data = [] + for path in args.headers: + if path[-5:] == '.json': + show('WARN: Skipping "' + path + '" because it\'s already a JSON file!') + else: + data.append(parse_header(path, cpp_opts)) + output_json(data, not args.pretty_print, args.output) sys.exit(0) # Look for structs in all passed headers. @@ -339,30 +492,18 @@ def main(): for header in args.headers: if header[-5:] == '.json': - # This is a JSON file, simply load it. - with open(header, 'r') as stream: - data = json.load(stream) - - if not isinstance(data, list): - data = [ data ] - - for item in data: - header_files.append(item['file']) - structs.update(item['structs']) - defines.update(item['defines']) + # This is a JSON file, parse it. + parse_json(header, header_files, structs, defines) else: # If the passed file isn't a JSON file, assume it's a header. header_files.append(header) - structs.update(parse_header(header, cpp_opts)) + data = parse_header(header, cpp_opts) + structs.update(data['structs']) + defines.extend(data['defines']) # Inspect all collected structs. struct_info = inspect_code(header_files, cpp_opts, structs, defines) - - if args.output == None: - sys.stdout.write(json.dumps(struct_info, indent=4 if args.pretty_print else None)) - else: - with open(args.output, 'w') as stream: - json.dump(struct_info, stream, indent=4 if args.pretty_print else None) + output_json(struct_info, not args.pretty_print, args.output) if __name__ == '__main__': - main()
\ No newline at end of file + main(sys.argv[1:])
\ No newline at end of file |