# -*- encoding: utf8 -*- ''' This tool extracts information about structs and defines from the C headers. You can pass either the raw header files or JSON files to this script. The JSON input format is as follows: [ { 'file': 'some/header.h', 'structs': { 'struct_name': [ 'field1', 'field2', 'field3', { 'field4': [ 'nested1', 'nested2', { 'nested3': [ 'deep_nested1', ... ] } ... ] }, 'field5' ], 'other_struct': [ 'field1', 'field2', ... ] }, 'defines': [ 'DEFINE_1', 'DEFINE_2', ['f', 'FLOAT_DEFINE'], 'DEFINE_3', ... ] }, { 'file': 'some/other/header.h', ... } ] Please note that the 'f' for 'FLOAT_DEFINE' is just the format passed to printf(), you can put anything printf() understands. If you call this script with the flag "-f" and pass a header file, it will create an automated boilerplate for you. The JSON output format is based on the return value of Runtime.generateStructInfo(). { 'structs': { 'struct_name': { '__size__': , 'field1': , 'field2': , 'field3': , 'field4': { '__size__': , 'nested1': , ... }, ... } }, 'defines': { 'DEFINE_1': , ... } } ''' import sys, os, re, json, argparse, tempfile, subprocess import shared QUIET = (__name__ != '__main__') def show(msg): global QUIET if not QUIET: sys.stderr.write(msg + '\n') # Try to load pycparser. try: import pycparser except ImportError: # The import failed, warn the user. show('WARN: pycparser isn\'t available. I won\'t be able to parse C files, only .json files.') def parse_header(path, cpp_opts): # Tell the user how to get pycparser, if he or she tries to parse a C file. sys.stderr.write('ERR: I need pycparser to process C files. \n') sys.stderr.write(' Use "pip install pycparser" to install or download it from "https://github.com/eliben/pycparser".\n') sys.exit(1) else: # We successfully imported pycparser, the script will be completely functional. class DelayedRef(object): def __init__(self, dest): self.dest = dest def __str__(self): return self.dest # For a list of node types and their fields, look here: https://github.com/eliben/pycparser/blob/master/pycparser/_c_ast.cfg class FieldVisitor(pycparser.c_ast.NodeVisitor): def __init__(self): self._name = None self.structs = {} self.named_structs = {} def visit_Struct(self, node): if node.decls == None: self.named_structs[self._name] = DelayedRef(node.name) return fields = [] for decl in node.decls: if decl.name == None: # Well, this field doesn't have a name. continue if decl.type != None and isinstance(decl.type, pycparser.c_ast.PtrDecl): # This field is a pointer, there's no point in looking for nested structs. fields.append(decl.name) else: # Look for nested structs. subwalk = FieldVisitor() subwalk.visit(decl) if subwalk.named_structs: # Store the nested fields. fields.append(subwalk.named_structs) else: # Just store the field name. fields.append(decl.name) if node.name != None: self.structs[node.name] = fields self.named_structs[self._name] = fields def visit_Union(self, node): self.visit_Struct(node) def visit_TypeDecl(self, node): # Remember the name of this typedef, so we can access it later in visit_Struct(). old_name = self._name self._name = node.declname self.generic_visit(node) self._name = old_name # The first parameter is a structure, the second is a path (a list containing all the keys, needed to reach the destination). # The last parameter is an item to look for. This function will try to follow the path into the given object and then look there for this key. # As long as the nested object doesn't have the given key, it will descent into the next higher object till it finds the given key. # # Example: # # res = look_through({ # 'la1': { # 'lb1': { # 'lc1': 99, # 'lc2': { 'ld1': 11 } # 'lc2': 200 # }, # 'nice': 100 # }, # 'nice': 300 # }, ['la1', 'lb1', 'lc2'], 'nice') # # print(res) # Prints 100 . # # In this case the function looked inside obj['la1']['lb1']['lc2']['nice'], then obj['la1']['lb1']['nice'] and found the value # in obj['la1']['nice']. As soon as it finds a value it returns it and stops looking. def look_through(obj, path, name): cur_level = obj path = path[:] for i, p in enumerate(path): cur_level = cur_level[p] path[i] = cur_level path = [ obj ] + path while len(path) > 0: if name in path[-1]: return path[-1][name] else: path.pop() return None # Use the above function to resolve all DelayedRef() inside a list or dict recursively. def resolve_delayed(item, root=None, path=[]): if root == None: root = item if isinstance(item, DelayedRef): if item.dest in path: show('WARN: Circular reference found! Field "' + path[-1] + '" references "' + item.dest + '"! (Path = ' + '/'.join([str(part) for part in path]) + ')') return { '__ref__': item.dest } else: return look_through(root, path[:-1], item.dest) elif isinstance(item, dict): for name, val in item.items(): item[name] = resolve_delayed(val, root, path + [ name ]) elif isinstance(item, list): for i, val in enumerate(item): item[i] = resolve_delayed(val, root, path + [ i ]) return item def parse_header(path, cpp_opts): show('Parsing header "' + path + '"...') # Use clang -E as the preprocessor for pycparser. ast = pycparser.parse_file(path, True, cpp_path=shared.CLANG_CC, cpp_args=['-E'] + cpp_opts) # Walk the parsed AST and filter out all the declared structs and their fields. walker = FieldVisitor() walker.visit(ast) walker.structs = resolve_delayed(walker.structs) with open(path, 'r') as stream: defines = re.findall(r'(?:^|\n)\s*#define\s+([A-Z|_|0-9]+)\s.*', stream.read()) return { 'file': path, 'defines': defines, 'structs': walker.structs } # The following three functions generate C code. The output of the compiled code will be # parsed later on and then put back together into a dict structure by parse_c_output(). # # Example: # c_descent('test1', code) # c_set('item', 'i%i', '111', code) # c_set('item2', 'i%i', '9', code) # c_set('item3', 's%s', '"Hello"', code) # c_ascent(code) # c_set('outer', 'f%f', '0.999', code) # # Will result in: # { # 'test1': { # 'item': 111, # 'item2': 9, # 'item3': 'Hello', # }, # 'outer': 0.999 # } def c_set(name, type_, value, code): code.append('printf("K' + name + '\\n");') code.append('printf("V' + type_ + '\\n", ' + value + ');') def c_descent(name, code): code.append('printf("D' + name + '\\n");') def c_ascent(code): code.append('printf("A\\n");') def parse_c_output(lines): result = {} cur_level = result parent = [] key = None for line in lines: arg = line[1:].strip() if line[0] == 'K': # This is a key key = arg elif line[0] == 'V': # A value if arg[0] == 'i': arg = int(arg[1:]) elif arg[0] == 'f': arg = float(arg[1:]) elif arg[0] == 's': arg = arg[1:] cur_level[key] = arg elif line[0] == 'D': # Remember the current level as the last parent. parent.append(cur_level) # We descend one level. cur_level[arg] = {} cur_level = cur_level[arg] elif line[0] == 'A': # We return to the parent dict. (One level up.) cur_level = parent.pop() return result def gen_inspect_code(path, struct, code): if path[0][-1] == '#': path[0] = path[0][:-1] prefix = '' else: prefix = 'struct ' c_descent(path[-1], code) if len(path) == 1: c_set('__size__', 'i%u', 'sizeof (' + prefix + path[0] + ')', code) else: c_set('__size__', 'i%u', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code) #c_set('__offset__', 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code) for field in struct: if isinstance(field, dict): # We have to recurse to inspect the nested dict. fname = field.keys()[0] gen_inspect_code(path + [fname], field[fname], code) else: c_set(field, 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code) c_ascent(code) def inspect_code(headers, cpp_opts, structs, defines): show('Generating C code...') code = ['#include ', '#include '] # Include all the needed headers. for path in headers: code.append('#include "' + path + '"') code.append('int main() {') c_descent('structs', code) for name, struct in structs.items(): gen_inspect_code([name], struct, code) c_ascent(code) c_descent('defines', code) for name, type_ in defines.items(): # Add the necessary python type, if missing. if '%' not in type_: if type_[-1] in ('d', 'i', 'u'): # integer type_ = 'i%' + type_ elif type_[-1] in ('f', 'F', 'e', 'E', 'g', 'G'): # float type_ = 'f%' + type_ elif type_[-1] in ('x', 'X', 'a', 'A', 'c', 's'): # hexadecimal or string type_ = 's%' + type_ c_set(name, type_, name, code) code.append('return 0;') code.append('}') # Write the source code to a temporary file. src_file = tempfile.mkstemp('.c') bin_file = tempfile.mkstemp('.ll') os.write(src_file[0], '\n'.join(code)) # Close all unneeded FDs. os.close(src_file[0]) os.close(bin_file[0]) # NOTE: We can't generate an executable in the next step because it won't run on the current system without changing the target. # If we change the target, some type sizes will change resulting in wrong data. As a workaround, we will be generating bitcode and # run that with the LLVM interpreter. That way we can use the default target and still run the code. info = [] try: # Compile the program. show('Compiling generated code...') subprocess.check_call([shared.CLANG_CC, '-emit-llvm', '-S'] + cpp_opts + ['-o', bin_file[1], src_file[1]]) # Run the compiled program. show('Calling generated program...') info = subprocess.check_output([shared.LLVM_INTERPRETER, bin_file[1]]).splitlines() except subprocess.CalledProcessError: if os.path.isfile(bin_file[1]): sys.stderr.write('FAIL: Running the generated program failed!\n') else: sys.stderr.write('FAIL: Compilation failed!\n') sys.exit(1) finally: # Remove all temporary files. os.unlink(src_file[1]) if os.path.exists(bin_file[1]): os.unlink(bin_file[1]) # Parse the output of the program into a dict. return parse_c_output(info) def parse_json(path, header_files, structs, defines): with open(path, 'r') as stream: # Remove comments before loading the JSON. data = json.loads(re.sub(r'//.*\n', '', stream.read())) if not isinstance(data, list): data = [ data ] for item in data: header_files.append(item['file']) for name, data in item['structs'].items(): if name in structs: show('WARN: Description of struct "' + name + '" in file "' + item['file'] + '" replaces an existing description!') structs[name] = data for part in item['defines']: if not isinstance(part, list): # If no type is specified, assume integer. part = ['i', part] if part[1] in defines: show('WARN: Description of define "' + part[1] + '" in file "' + item['file'] + '" replaces an existing description!') defines[part[1]] = part[0] def output_json(obj, compressed=True, stream=None): if stream == None: stream = sys.stdout elif isinstance(stream, str): stream = open(stream, 'w') if compressed: json.dump(obj, stream, separators=(',', ':')) else: json.dump(obj, stream, indent=4, sort_keys=True) stream.close() def filter_opts(opts): # Only apply compiler options regarding syntax, includes and defines. # We have to compile for the current system, we aren't compiling to bitcode after all. out = [] for flag in opts: if flag[:2] in ('-f', '-I', '-i', '-D', '-U'): out.append(flag) return out def main(args): global QUIET parser = argparse.ArgumentParser(description='Generate JSON infos for structs.') parser.add_argument('headers', nargs='+', help='A header (.h) file or a JSON file with a list of structs and their fields') parser.add_argument('-q', dest='quiet', action='store_true', default=False, help='Don\'t output anything besides error messages.') parser.add_argument('-f', dest='list_fields', action='store_true', default=False, help='Output a list of structs and fields for the given headers.') parser.add_argument('-p', dest='pretty_print', action='store_true', default=False, help='Pretty print the outputted JSON.') parser.add_argument('-o', dest='output', metavar='path', default=None, help='Path to the JSON file that will be written. If omitted, the generated data will be printed to stdout.') parser.add_argument('-I', dest='includes', metavar='dir', action='append', default=[], help='Add directory to include search path') parser.add_argument('-D', dest='defines', metavar='define', action='append', default=[], help='Pass a define to the preprocessor') parser.add_argument('-U', dest='undefines', metavar='undefine', action='append', default=[], help='Pass an undefine to the preprocessor') args = parser.parse_args(args) QUIET = args.quiet # Avoid parsing problems due to gcc specifc syntax. cpp_opts = ['-D_GNU_SOURCE'] + shared.COMPILER_OPTS # Add the user options to the list as well. for path in args.includes: cpp_opts.append('-I' + path) for arg in args.defines: cpp_opts.append('-D' + arg) for arg in args.undefines: cpp_opts.append('-U' + arg) if args.list_fields: # Just parse the given headers and output the result. data = [] for path in args.headers: if path[-5:] == '.json': show('WARN: Skipping "' + path + '" because it\'s already a JSON file!') else: data.append(parse_header(path, cpp_opts)) output_json(data, not args.pretty_print, args.output) sys.exit(0) # Look for structs in all passed headers. header_files = [] structs = {} defines = {} for header in args.headers: if header[-5:] == '.json': # This is a JSON file, parse it. parse_json(header, header_files, structs, defines) else: # If the passed file isn't a JSON file, assume it's a header. header_files.append(header) data = parse_header(header, cpp_opts) structs.update(data['structs']) defines.extend(data['defines']) # Inspect all collected structs. struct_info = inspect_code(header_files, cpp_opts, structs, defines) output_json(struct_info, not args.pretty_print, args.output) if __name__ == '__main__': main(sys.argv[1:])