diff options
Diffstat (limited to 'tools/gen_struct_info.py')
-rw-r--r-- | tools/gen_struct_info.py | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py new file mode 100644 index 00000000..aab128eb --- /dev/null +++ b/tools/gen_struct_info.py @@ -0,0 +1,509 @@ +# -*- encoding: utf8 -*- + +''' +This tool extracts information about structs and defines from the C headers. +You can pass either the raw header files or JSON files to this script. + +The JSON input format is as follows: +[ + { + 'file': 'some/header.h', + 'structs': { + 'struct_name': [ + 'field1', + 'field2', + 'field3', + { + 'field4': [ + 'nested1', + 'nested2', + { + 'nested3': [ + 'deep_nested1', + ... + ] + } + ... + ] + }, + 'field5' + ], + 'other_struct': [ + 'field1', + 'field2', + ... + ] + }, + 'defines': [ + 'DEFINE_1', + 'DEFINE_2', + ['f', 'FLOAT_DEFINE'], + 'DEFINE_3', + ... + ] + }, + { + 'file': 'some/other/header.h', + ... + } +] + +Please note that the 'f' for 'FLOAT_DEFINE' is just the format passed to printf(), you can put anything printf() understands. +If you call this script with the flag "-f" and pass a header file, it will create an automated boilerplate for you. + +The JSON output format is based on the return value of Runtime.generateStructInfo(). +{ + 'structs': { + 'struct_name': { + '__size__': <the struct's size>, + 'field1': <field1's offset>, + 'field2': <field2's offset>, + 'field3': <field3's offset>, + 'field4': { + '__size__': <field4's size>, + 'nested1': <nested1's offset>, + ... + }, + ... + } + }, + 'defines': { + 'DEFINE_1': <DEFINE_1's value>, + ... + } +} + +''' + +import sys, os, re, json, argparse, tempfile, subprocess +import shared +QUIET = (__name__ != '__main__') + +def show(msg): + global QUIET + if not QUIET: + sys.stderr.write(msg + '\n') + +# Try to load pycparser. +try: + import pycparser +except ImportError: + # The import failed, warn the user. + show('WARN: pycparser isn\'t available. I won\'t be able to parse C files, only .json files.') + + def parse_header(path, cpp_opts): + # Tell the user how to get pycparser, if he or she tries to parse a C file. + sys.stderr.write('ERR: I need pycparser to process C files. \n') + sys.stderr.write(' Use "pip install pycparser" to install or download it from "https://github.com/eliben/pycparser".\n') + sys.exit(1) +else: + # We successfully imported pycparser, the script will be completely functional. + + class DelayedRef(object): + def __init__(self, dest): + self.dest = dest + + def __str__(self): + return self.dest + + # For a list of node types and their fields, look here: https://github.com/eliben/pycparser/blob/master/pycparser/_c_ast.cfg + class FieldVisitor(pycparser.c_ast.NodeVisitor): + def __init__(self): + self._name = None + self.structs = {} + self.named_structs = {} + + def visit_Struct(self, node): + if node.decls == None: + self.named_structs[self._name] = DelayedRef(node.name) + return + + + fields = [] + for decl in node.decls: + if decl.name == None: + # Well, this field doesn't have a name. + continue + + if decl.type != None and isinstance(decl.type, pycparser.c_ast.PtrDecl): + # This field is a pointer, there's no point in looking for nested structs. + fields.append(decl.name) + else: + # Look for nested structs. + subwalk = FieldVisitor() + subwalk.visit(decl) + + if subwalk.named_structs: + # Store the nested fields. + fields.append(subwalk.named_structs) + else: + # Just store the field name. + fields.append(decl.name) + + if node.name != None: + self.structs[node.name] = fields + + self.named_structs[self._name] = fields + + def visit_Union(self, node): + self.visit_Struct(node) + + def visit_TypeDecl(self, node): + # Remember the name of this typedef, so we can access it later in visit_Struct(). + old_name = self._name + self._name = node.declname + self.generic_visit(node) + self._name = old_name + + # The first parameter is a structure, the second is a path (a list containing all the keys, needed to reach the destination). + # The last parameter is an item to look for. This function will try to follow the path into the given object and then look there for this key. + # As long as the nested object doesn't have the given key, it will descent into the next higher object till it finds the given key. + # + # Example: + # + # res = look_through({ + # 'la1': { + # 'lb1': { + # 'lc1': 99, + # 'lc2': { 'ld1': 11 } + # 'lc2': 200 + # }, + # 'nice': 100 + # }, + # 'nice': 300 + # }, ['la1', 'lb1', 'lc2'], 'nice') + # + # print(res) # Prints 100 . + # + # In this case the function looked inside obj['la1']['lb1']['lc2']['nice'], then obj['la1']['lb1']['nice'] and found the value + # in obj['la1']['nice']. As soon as it finds a value it returns it and stops looking. + def look_through(obj, path, name): + cur_level = obj + path = path[:] + for i, p in enumerate(path): + cur_level = cur_level[p] + path[i] = cur_level + + path = [ obj ] + path + + while len(path) > 0: + if name in path[-1]: + return path[-1][name] + else: + path.pop() + + return None + + # Use the above function to resolve all DelayedRef() inside a list or dict recursively. + def resolve_delayed(item, root=None, path=[]): + if root == None: + root = item + + if isinstance(item, DelayedRef): + if item.dest in path: + show('WARN: Circular reference found! Field "' + path[-1] + '" references "' + item.dest + '"! (Path = ' + '/'.join([str(part) for part in path]) + ')') + return { '__ref__': item.dest } + else: + return look_through(root, path[:-1], item.dest) + elif isinstance(item, dict): + for name, val in item.items(): + item[name] = resolve_delayed(val, root, path + [ name ]) + elif isinstance(item, list): + for i, val in enumerate(item): + item[i] = resolve_delayed(val, root, path + [ i ]) + + return item + + def parse_header(path, cpp_opts): + show('Parsing header "' + path + '"...') + + # Use clang -E as the preprocessor for pycparser. + ast = pycparser.parse_file(path, True, cpp_path=shared.CLANG_CC, cpp_args=['-E'] + cpp_opts) + + # Walk the parsed AST and filter out all the declared structs and their fields. + walker = FieldVisitor() + walker.visit(ast) + + walker.structs = resolve_delayed(walker.structs) + with open(path, 'r') as stream: + defines = re.findall(r'(?:^|\n)\s*#define\s+([A-Z|_|0-9]+)\s.*', stream.read()) + + return { + 'file': path, + 'defines': defines, + 'structs': walker.structs + } + +# The following three functions generate C code. The output of the compiled code will be +# parsed later on and then put back together into a dict structure by parse_c_output(). +# +# Example: +# c_descent('test1', code) +# c_set('item', 'i%i', '111', code) +# c_set('item2', 'i%i', '9', code) +# c_set('item3', 's%s', '"Hello"', code) +# c_ascent(code) +# c_set('outer', 'f%f', '0.999', code) +# +# Will result in: +# { +# 'test1': { +# 'item': 111, +# 'item2': 9, +# 'item3': 'Hello', +# }, +# 'outer': 0.999 +# } +def c_set(name, type_, value, code): + code.append('printf("K' + name + '\\n");') + code.append('printf("V' + type_ + '\\n", ' + value + ');') + +def c_descent(name, code): + code.append('printf("D' + name + '\\n");') + +def c_ascent(code): + code.append('printf("A\\n");') + +def parse_c_output(lines): + result = {} + cur_level = result + parent = [] + key = None + + for line in lines: + arg = line[1:].strip() + if line[0] == 'K': + # This is a key + key = arg + elif line[0] == 'V': + # A value + if arg[0] == 'i': + arg = int(arg[1:]) + elif arg[0] == 'f': + arg = float(arg[1:]) + elif arg[0] == 's': + arg = arg[1:] + + cur_level[key] = arg + elif line[0] == 'D': + # Remember the current level as the last parent. + parent.append(cur_level) + + # We descend one level. + cur_level[arg] = {} + cur_level = cur_level[arg] + elif line[0] == 'A': + # We return to the parent dict. (One level up.) + cur_level = parent.pop() + + return result + +def gen_inspect_code(path, struct, code): + if path[0][-1] == '#': + path[0] = path[0][:-1] + prefix = '' + else: + prefix = 'struct ' + + c_descent(path[-1], code) + + if len(path) == 1: + c_set('__size__', 'i%u', 'sizeof (' + prefix + path[0] + ')', code) + else: + c_set('__size__', 'i%u', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code) + #c_set('__offset__', 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code) + + for field in struct: + if isinstance(field, dict): + # We have to recurse to inspect the nested dict. + fname = field.keys()[0] + gen_inspect_code(path + [fname], field[fname], code) + else: + c_set(field, 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code) + + c_ascent(code) + +def inspect_code(headers, cpp_opts, structs, defines): + show('Generating C code...') + + code = ['#include <stdio.h>', '#include <stddef.h>'] + # Include all the needed headers. + for path in headers: + code.append('#include "' + path + '"') + + code.append('int main() {') + c_descent('structs', code) + for name, struct in structs.items(): + gen_inspect_code([name], struct, code) + + c_ascent(code) + c_descent('defines', code) + for name, type_ in defines.items(): + # Add the necessary python type, if missing. + if '%' not in type_: + if type_[-1] in ('d', 'i', 'u'): + # integer + type_ = 'i%' + type_ + elif type_[-1] in ('f', 'F', 'e', 'E', 'g', 'G'): + # float + type_ = 'f%' + type_ + elif type_[-1] in ('x', 'X', 'a', 'A', 'c', 's'): + # hexadecimal or string + type_ = 's%' + type_ + + c_set(name, type_, name, code) + + code.append('return 0;') + code.append('}') + + # Write the source code to a temporary file. + src_file = tempfile.mkstemp('.c') + bin_file = tempfile.mkstemp('.ll') + + os.write(src_file[0], '\n'.join(code)) + + # Close all unneeded FDs. + os.close(src_file[0]) + os.close(bin_file[0]) + + # NOTE: We can't generate an executable in the next step because it won't run on the current system without changing the target. + # If we change the target, some type sizes will change resulting in wrong data. As a workaround, we will be generating bitcode and + # run that with the LLVM interpreter. That way we can use the default target and still run the code. + info = [] + try: + # Compile the program. + show('Compiling generated code...') + subprocess.check_call([shared.CLANG_CC, '-emit-llvm', '-S'] + cpp_opts + ['-o', bin_file[1], src_file[1]]) + + # Run the compiled program. + show('Calling generated program...') + info = subprocess.check_output([shared.LLVM_INTERPRETER, bin_file[1]]).splitlines() + except subprocess.CalledProcessError: + if os.path.isfile(bin_file[1]): + sys.stderr.write('FAIL: Running the generated program failed!\n') + else: + sys.stderr.write('FAIL: Compilation failed!\n') + + sys.exit(1) + finally: + # Remove all temporary files. + os.unlink(src_file[1]) + + if os.path.exists(bin_file[1]): + os.unlink(bin_file[1]) + + # Parse the output of the program into a dict. + return parse_c_output(info) + +def parse_json(path, header_files, structs, defines): + with open(path, 'r') as stream: + # Remove comments before loading the JSON. + data = json.loads(re.sub(r'//.*\n', '', stream.read())) + + if not isinstance(data, list): + data = [ data ] + + for item in data: + header_files.append(item['file']) + for name, data in item['structs'].items(): + if name in structs: + show('WARN: Description of struct "' + name + '" in file "' + item['file'] + '" replaces an existing description!') + + structs[name] = data + + for part in item['defines']: + if not isinstance(part, list): + # If no type is specified, assume integer. + part = ['i', part] + + if part[1] in defines: + show('WARN: Description of define "' + part[1] + '" in file "' + item['file'] + '" replaces an existing description!') + + defines[part[1]] = part[0] + +def output_json(obj, compressed=True, stream=None): + if stream == None: + stream = sys.stdout + elif isinstance(stream, str): + stream = open(stream, 'w') + + if compressed: + json.dump(obj, stream, separators=(',', ':')) + else: + json.dump(obj, stream, indent=4, sort_keys=True) + + stream.close() + +def filter_opts(opts): + # Only apply compiler options regarding syntax, includes and defines. + # We have to compile for the current system, we aren't compiling to bitcode after all. + out = [] + for flag in opts: + if flag[:2] in ('-f', '-I', '-i', '-D', '-U'): + out.append(flag) + + return out + +def main(args): + global QUIET + + parser = argparse.ArgumentParser(description='Generate JSON infos for structs.') + parser.add_argument('headers', nargs='+', help='A header (.h) file or a JSON file with a list of structs and their fields') + parser.add_argument('-q', dest='quiet', action='store_true', default=False, help='Don\'t output anything besides error messages.') + parser.add_argument('-f', dest='list_fields', action='store_true', default=False, help='Output a list of structs and fields for the given headers.') + parser.add_argument('-p', dest='pretty_print', action='store_true', default=False, help='Pretty print the outputted JSON.') + parser.add_argument('-o', dest='output', metavar='path', default=None, help='Path to the JSON file that will be written. If omitted, the generated data will be printed to stdout.') + parser.add_argument('-I', dest='includes', metavar='dir', action='append', default=[], help='Add directory to include search path') + parser.add_argument('-D', dest='defines', metavar='define', action='append', default=[], help='Pass a define to the preprocessor') + parser.add_argument('-U', dest='undefines', metavar='undefine', action='append', default=[], help='Pass an undefine to the preprocessor') + args = parser.parse_args(args) + + QUIET = args.quiet + + # Avoid parsing problems due to gcc specifc syntax. + cpp_opts = ['-D_GNU_SOURCE'] + shared.COMPILER_OPTS + + # Add the user options to the list as well. + for path in args.includes: + cpp_opts.append('-I' + path) + + for arg in args.defines: + cpp_opts.append('-D' + arg) + + for arg in args.undefines: + cpp_opts.append('-U' + arg) + + if args.list_fields: + # Just parse the given headers and output the result. + data = [] + for path in args.headers: + if path[-5:] == '.json': + show('WARN: Skipping "' + path + '" because it\'s already a JSON file!') + else: + data.append(parse_header(path, cpp_opts)) + + output_json(data, not args.pretty_print, args.output) + sys.exit(0) + + # Look for structs in all passed headers. + header_files = [] + structs = {} + defines = {} + + for header in args.headers: + if header[-5:] == '.json': + # This is a JSON file, parse it. + parse_json(header, header_files, structs, defines) + else: + # If the passed file isn't a JSON file, assume it's a header. + header_files.append(header) + data = parse_header(header, cpp_opts) + structs.update(data['structs']) + defines.extend(data['defines']) + + # Inspect all collected structs. + struct_info = inspect_code(header_files, cpp_opts, structs, defines) + output_json(struct_info, not args.pretty_print, args.output) + +if __name__ == '__main__': + main(sys.argv[1:]) |