1 files changed, 238 insertions, 97 deletions
diff --git a/tools/gen_struct_info.py b/tools/gen_struct_info.py
index 563b4fa1..203a18ec 100644
--- a/tools/gen_struct_info.py
+++ b/tools/gen_struct_info.py
@@ -75,29 +75,38 @@ The JSON output format is based on the return value of Runtime.generateStructInf
 
 '''
 
-import sys
-import os
-import json
-import argparse
-import tempfile
-import subprocess
+import sys, os, re, json, argparse, tempfile, subprocess
 import shared
+QUIET = (__name__ != '__main__')
+
+def show(msg):
+  global QUIET
+  if not QUIET:
+    sys.stderr.write(msg + '\n')
 
 # Try to load pycparser.
 try:
   import pycparser
 except ImportError:
   # The import failed, warn the user.
-  sys.stderr.write('WARN: pycparser isn\'t available. I won\'t be able to parse C files, only .json files.\n')
+  show('WARN: pycparser isn\'t available. I won\'t be able to parse C files, only .json files.')
   
   def parse_header(path, cpp_opts):
     # Tell the user how to get pycparser, if he tries to parse a C file.
     sys.stderr.write('ERR: I need pycparser to process C files. \n')
-    sys.stderr.write('   Use "pip install pycparser" or go to "https://github.com/eliben/pycparser" to install it.\n')
+    sys.stderr.write('   Use "pip install pycparser" to install or download it from "https://github.com/eliben/pycparser".\n')
     sys.exit(1)
 else:
   # We successfully imported pycparser, the script will be completely functional.
   
+  class DelayedRef(object):
+    def __init__(self, dest):
+      self.dest = dest
+    
+    def __str__(self):
+      return self.dest
+  
+  # For a list of node types and their fields, look here: https://github.com/eliben/pycparser/blob/master/pycparser/_c_ast.cfg
   class FieldVisitor(pycparser.c_ast.NodeVisitor):
     def __init__(self):
       self._name = None
@@ -106,21 +115,30 @@ else:
     
     def visit_Struct(self, node):
       if node.decls == None:
-        # Skip empty struct declarations.
+        self.named_structs[self._name] = DelayedRef(node.name)
         return
+        
       
       fields = []
       for decl in node.decls:
-        # Look for nested structs.
-        subwalk = FieldVisitor()
-        subwalk.visit(decl)
+        if decl.name == None:
+          # Well, this field doesn't have a name.
+          continue
         
-        if subwalk.named_structs:
-          # Store the nested fields.
-          fields.append(subwalk.named_structs)
-        else:
-          # Just store the field name.
+        if decl.type != None and isinstance(decl.type, pycparser.c_ast.PtrDecl):
+          # This field is a pointer, there's no point in looking for nested structs.
           fields.append(decl.name)
+        else:
+          # Look for nested structs.
+          subwalk = FieldVisitor()
+          subwalk.visit(decl)
+          
+          if subwalk.named_structs:
+            # Store the nested fields.
+            fields.append(subwalk.named_structs)
+          else:
+            # Just store the field name.
+            fields.append(decl.name)
       
       if node.name != None:
         self.structs[node.name] = fields
@@ -136,9 +154,68 @@ else:
       self._name = node.declname
       self.generic_visit(node)
       self._name = old_name
-
+  
+  # The first parameter is a structure, the second is a path (a list containing all the keys, needed to reach the destination).
+  # The last parameter is an item to look for. This function will try to follow the path into the given object and then look there for this key.
+  # As long as the nested object doesn't have the given key, it will descent into the next higher object till it finds the given key.
+  # 
+  # Example:
+  # 
+  # res = look_through({
+  #   'la1': {
+  #     'lb1': {
+  #       'lc1': 99,
+  #       'lc2': { 'ld1': 11 }
+  #       'lc2': 200
+  #     },
+  #     'nice': 100
+  #   },
+  #   'nice': 300
+  # }, ['la1', 'lb1', 'lc2'], 'nice')
+  # 
+  # print(res)    # Prints 100 .
+  # 
+  # In this case the function looked inside obj['la1']['lb1']['lc2']['nice'], then obj['la1']['lb1']['nice'] and found the value
+  # in obj['la1']['nice']. As soon as it finds a value it returns it and stops looking.
+  def look_through(obj, path, name):
+    cur_level = obj
+    path = path[:]
+    for i, p in enumerate(path):
+      cur_level = cur_level[p]
+      path[i] = cur_level
+    
+    path = [ obj ] + path
+    
+    while len(path) > 0:
+      if name in path[-1]:
+        return path[-1][name]
+      else:
+        path.pop()
+    
+    return None
+  
+  # Use the above function to resolve all DelayedRef() inside a list or dict recursively.
+  def resolve_delayed(item, root=None, path=[]):
+    if root == None:
+      root = item
+    
+    if isinstance(item, DelayedRef):
+      if item.dest in path:
+        show('WARN: Circular reference found! Field "' + path[-1] + '" references "' + item.dest + '"! (Path = ' + '/'.join([str(part) for part in path]) + ')')
+        return { '__ref__': item.dest }
+      else:
+        return look_through(root, path[:-1], item.dest)
+    elif isinstance(item, dict):
+      for name, val in item.items():
+        item[name] = resolve_delayed(val, root, path + [ name ])
+    elif isinstance(item, list):
+      for i, val in enumerate(item):
+        item[i] = resolve_delayed(val, root, path + [ i ])
+    
+    return item
+  
   def parse_header(path, cpp_opts):
-    sys.stderr.write('Parsing header "' + path + '"...\n')
+    show('Parsing header "' + path + '"...')
     
     # Use clang -E as the preprocessor for pycparser.
     ast = pycparser.parse_file(path, True, cpp_path=shared.CLANG_CC, cpp_args=['-E'] + cpp_opts)
@@ -146,10 +223,37 @@ else:
     # Walk the parsed AST and filter out all the declared structs and their fields.
     walker = FieldVisitor()
     walker.visit(ast)
-    return walker.structs
+    
+    walker.structs = resolve_delayed(walker.structs)
+    with open(path, 'r') as stream:
+      defines = re.findall(r'(?:^|\n)#define\s+([A-Z|_]+)\s.*', stream.read())
+    
+    return {
+      'file': path,
+      'defines': defines,
+      'structs': walker.structs
+    }
 
 # The following three functions generate C code. The output of the compiled code will be
 # parsed later on and then put back together into a dict structure by parse_c_output().
+# 
+# Example:
+#   c_descent('test1', code)
+#   c_set('item', 'i%i', '111', code)
+#   c_set('item2', 'i%i', '9', code)
+#   c_set('item3', 's%s', '"Hello"', code)
+#   c_ascent(code)
+#   c_set('outer', 'f%f', '0.999', code)
+#   
+# Will result in:
+#   {
+#     'test1': {
+#       'item': 111,
+#       'item2': 9,
+#       'item3': 'Hello',
+#     },
+#     'outer': 0.999
+#   }
 def c_set(name, type_, value, code):
   code.append('printf("K' + name + '\\n");')
   code.append('printf("V' + type_ + '\\n", ' + value + ');')
@@ -173,7 +277,14 @@ def parse_c_output(lines):
       key = arg
     elif line[0] == 'V':
       # A value
-      cur_level[key] = int(arg)
+      if arg[0] == 'i':
+        arg = int(arg[1:])
+      elif arg[0] == 'f':
+        arg = float(arg[1:])
+      elif arg[0] == 's':
+        arg = arg[1:]
+      
+      cur_level[key] = arg
     elif line[0] == 'D':
       # Remember the current level as the last parent.
       parent.append(cur_level)
@@ -188,11 +299,19 @@ def parse_c_output(lines):
   return result
 
 def gen_inspect_code(path, struct, code):
+  if path[0][-1] == '#':
+    path[0] = path[0][:-1]
+    prefix = ''
+  else:
+    prefix = 'struct '
+  
   c_descent(path[-1], code)
+  
   if len(path) == 1:
-    c_set('__size__', '%lu', 'sizeof (struct ' + path[0] + ')', code)
+    c_set('__size__', 'i%u', 'sizeof (' + prefix + path[0] + ')', code)
   else:
-    c_set('__size__', '%lu', 'sizeof ((struct ' + path[0] + ' *)0)->' + '.'.join(path[1:]), code)
+    c_set('__size__', 'i%u', 'sizeof ((' + prefix + path[0] + ' *)0)->' + '.'.join(path[1:]), code)
+    #c_set('__offset__', 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:]) + ')', code)
   
   for field in struct:
     if isinstance(field, dict):
@@ -200,12 +319,12 @@ def gen_inspect_code(path, struct, code):
       fname = field.keys()[0]
       gen_inspect_code(path + [fname], field[fname], code)
     else:
-      c_set(field, '%u', 'offsetof(struct ' + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code)
+      c_set(field, 'i%u', 'offsetof(' + prefix + path[0] + ', ' + '.'.join(path[1:] + [field]) + ')', code)
   
   c_ascent(code)
 
 def inspect_code(headers, cpp_opts, structs, defines):
-  sys.stderr.write('Generating C code...\n')
+  show('Generating C code...')
   
   code = ['#include <stdio.h>', '#include <stddef.h>']
   # Include all the needed headers.
@@ -219,19 +338,27 @@ def inspect_code(headers, cpp_opts, structs, defines):
   
   c_ascent(code)
   c_descent('defines', code)
-  for name in defines:
-    if isinstance(name, list):
-      type_, name = name
-    else:
-      type_ = 'i'
-    c_set(name, '%' + type_, name, code)
+  for name, type_ in defines.items():
+    # Add the necessary python type, if missing.
+    if '%' not in type_:
+      if type_[-1] in ('d', 'i', 'u'):
+        # integer
+        type_ = 'i%' + type_
+      elif type_[-1] in ('f', 'F', 'e', 'E', 'g', 'G'):
+        # float
+        type_ = 'f%' + type_
+      elif type_[-1] in ('x', 'X', 'a', 'A', 'c', 's'):
+        # hexadecimal or string
+        type_ = 's%' + type_
+    
+    c_set(name, type_, name, code)
   
   code.append('return 0;')
   code.append('}')
   
   # Write the source code to a temporary file.
   src_file = tempfile.mkstemp('.c')
-  bin_file = tempfile.mkstemp()
+  bin_file = tempfile.mkstemp('.ll')
   
   os.write(src_file[0], '\n'.join(code))
   
@@ -239,15 +366,18 @@ def inspect_code(headers, cpp_opts, structs, defines):
   os.close(src_file[0])
   os.close(bin_file[0])
   
+  # NOTE: We can't generate an executable in the next step because it won't run on the current system without changing the target.
+  # If we change the target, some type sizes will change resulting in wrong data. As a workaround, we will be generating bitcode and
+  # run that with the LLVM interpreter. That way we can use the default target and still run the code.
   info = []
   try:
     # Compile the program.
-    sys.stderr.write('Compiling generated code...\n')
-    subprocess.check_call([shared.CLANG_CC] + cpp_opts + ['-o', bin_file[1], src_file[1]])
+    show('Compiling generated code...')
+    subprocess.check_call([shared.CLANG_CC, '-emit-llvm', '-S'] + cpp_opts + ['-o', bin_file[1], src_file[1]])
     
     # Run the compiled program.
-    sys.stderr.write('Calling generated program...\n')
-    info = subprocess.check_output([bin_file[1]]).splitlines()
+    show('Calling generated program...')
+    info = subprocess.check_output([shared.LLVM_INTERPRETER, bin_file[1]]).splitlines()
   except subprocess.CalledProcessError:
     if os.path.isfile(bin_file[1]):
       sys.stderr.write('FAIL: Running the generated program failed!\n')
@@ -263,47 +393,75 @@ def inspect_code(headers, cpp_opts, structs, defines):
       os.unlink(bin_file[1])
   
   # Parse the output of the program into a dict.
-  data = parse_c_output(info)
+  return parse_c_output(info)
+
+def parse_json(path, header_files, structs, defines):
+  with open(path, 'r') as stream:
+    # Remove comments before loading the JSON.
+    data = json.loads(re.sub(r'//.*\n', '', stream.read()))
   
-  # Convert all the define's values into the appropriate python types (based on the type passed to printf).
-  for name in defines:
-    if isinstance(name, list):
-      type_, name = name
-    else:
-      type_ = 'i'
+  if not isinstance(data, list):
+    data = [ data ]
+  
+  for item in data:
+    header_files.append(item['file'])
+    for name, data in item['structs'].items():
+      if name in structs:
+        show('WARN: Description of struct "' + name + '" in file "' + item['file'] + '" replaces an existing description!')
+      
+      structs[name] = data
     
-    if type_[-1] in ('d', 'i', 'u'):
-      # Integer
-      data['defines'][name] = int(data['defines'][name])
-    elif type_[-1] in ('x', 'X', 'a', 'A'):
-      # Hexadecimal
-      data['defines'][name] = float.fromhex(data['defines'][name])
-    elif type_[-1] in ('f', 'F', 'e', 'E', 'g', 'G'):
-      # Float
-      data['defines'][name] = float(data['defines'][name])
-    # Leave everything else untouched.
-  
-  return data
+    for part in item['defines']:
+      if not isinstance(part, list):
+        # If no type is specified, assume integer.
+        part = ['i', part]
+      
+      if part[1] in defines:
+        show('WARN: Description of define "' + part[1] + '" in file "' + item['file'] + '" replaces an existing description!')
+      
+      defines[part[1]] = part[0]
+
+def output_json(obj, compressed=True, stream=None):
+  if stream == None:
+    stream = sys.stdout
+  elif isinstance(stream, str):
+    stream = open(stream, 'w')
+  
+  if compressed:
+    json.dump(obj, stream, separators=(',', ':'))
+  else:
+    json.dump(obj, stream, indent=4, sort_keys=True)
+  
+  stream.close()
+
+def filter_opts(opts):
+  # Only apply compiler options regarding syntax, includes and defines.
+  # We have to compile for the current system, we aren't compiling to bitcode after all.
+  out = []
+  for flag in opts:
+    if flag[:2] in ('-f', '-I', '-i', '-D', '-U'):
+      out.append(flag)
+  
+  return out
 
-def main():
+def main(args):
+  global QUIET
+  
   parser = argparse.ArgumentParser(description='Generate JSON infos for structs.')
   parser.add_argument('headers', nargs='+', help='A header (.h) file or a JSON file with a list of structs and their fields')
-  parser.add_argument('-f', dest='list_fields', action='store_true', default=False, help='Output a list of structs and fields for the first header.')
+  parser.add_argument('-q', dest='quiet', action='store_true', default=False, help='Don\'t output anything besides error messages.')
+  parser.add_argument('-f', dest='list_fields', action='store_true', default=False, help='Output a list of structs and fields for the given headers.')
   parser.add_argument('-p', dest='pretty_print', action='store_true', default=False, help='Pretty print the outputted JSON.')
   parser.add_argument('-o', dest='output', metavar='path', default=None, help='Path to the JSON file that will be written. If omitted, the generated data will be printed to stdout.')
   parser.add_argument('-I', dest='includes', metavar='dir', action='append', default=[], help='Add directory to include search path')
   parser.add_argument('-D', dest='defines', metavar='define', action='append', default=[], help='Pass a define to the preprocessor')
   parser.add_argument('-U', dest='undefines', metavar='undefine', action='append', default=[], help='Pass an undefine to the preprocessor')
-  args = parser.parse_args()
+  args = parser.parse_args(args)
   
-  # Avoid parsing problems due to gcc specifc syntax.
-  cpp_opts = ['-U__GNUC__']
+  QUIET = args.quiet
   
-  # Only apply compiler options regarding syntax, includes and defines.
-  # We have to compile for the current system, we aren't compiling to bitcode after all.
-  for flag in shared.COMPILER_OPTS:
-    if flag[:2] in ('-f', '-I', '-i', '-D', '-U'):
-      cpp_opts.append(flag)
+  # Avoid parsing problems due to gcc specifc syntax.
+  cpp_opts = ['-U__GNUC__', '-D_GNU_SOURCE'] + shared.COMPILER_OPTS
   
   # Add the user options to the list as well.
   for path in args.includes:
@@ -316,20 +474,15 @@ def main():
     cpp_opts.append('-U' + arg)
   
   if args.list_fields:
-    # Just parse the first header and output the result.
-    structs = parse_header(args.headers[0], cpp_opts)
-    data = {
-      'file': args.headers[0],
-      'structs': structs,
-      'defines': []
-    }
-    
-    if args.output == None:
-      sys.stdout.write(json.dumps(data, indent=4 if args.pretty_print else None))
-    else:
-      with open(args.output, 'w') as stream:
-        json.dump(data, stream, indent=4 if args.pretty_print else None)
+    # Just parse the given headers and output the result.
+    data = []
+    for path in args.headers:
+      if path[-5:] == '.json':
+        show('WARN: Skipping "' + path + '" because it\'s already a JSON file!')
+      else:
+        data.append(parse_header(path, cpp_opts))
     
+    output_json(data, not args.pretty_print, args.output)
     sys.exit(0)
   
   # Look for structs in all passed headers.
@@ -339,30 +492,18 @@ def main():
   
   for header in args.headers:
     if header[-5:] == '.json':
-      # This is a JSON file, simply load it.
-      with open(header, 'r') as stream:
-        data = json.load(stream)
-      
-      if not isinstance(data, list):
-        data = [ data ]
-      
-      for item in data:
-        header_files.append(item['file'])
-        structs.update(item['structs'])
-        defines.update(item['defines'])
+      # This is a JSON file, parse it.
+      parse_json(header, header_files, structs, defines)
     else:
       # If the passed file isn't a JSON file, assume it's a header.
       header_files.append(header)
-      structs.update(parse_header(header, cpp_opts))
+      data = parse_header(header, cpp_opts)
+      structs.update(data['structs'])
+      defines.extend(data['defines'])
   
   # Inspect all collected structs.
   struct_info = inspect_code(header_files, cpp_opts, structs, defines)
-  
-  if args.output == None:
-    sys.stdout.write(json.dumps(struct_info, indent=4 if args.pretty_print else None))
-  else:
-    with open(args.output, 'w') as stream:
-      json.dump(struct_info, stream, indent=4 if args.pretty_print else None)
+  output_json(struct_info, not args.pretty_print, args.output)
 
 if __name__ == '__main__':
-  main()
-\ No newline at end of file
+  main(sys.argv[1:])
+\ No newline at end of file