dead function elimination tool

author: Alon Zakai <alonzakai@gmail.com> 2011-04-24 17:57:01 -0700
committer: Alon Zakai <alonzakai@gmail.com> 2011-04-24 17:57:01 -0700
commit: 0a2001bf4f9826560be577f053dcdfd8b07dcca0 (patch)
tree: c669d257b82d0a0c333e9db33c786ab9074c7c59
parent: 6327b8b954f60d6f6f9ed729373ab1ece1f8c636 (diff)
2 files changed, 234 insertions, 26 deletions
diff --git a/tests/runner.py b/tests/runner.py
index fb3c83f3..442f5c18 100644
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -28,6 +28,7 @@ DEMANGLER = path_from_root('third_party', 'demangler.py')
 NAMESPACER = path_from_root('tools', 'namespacer.py')
 EMMAKEN = path_from_root('tools', 'emmaken.py')
 AUTODEBUGGER = path_from_root('tools', 'autodebugger.py')
+DFE = path_from_root('tools', 'dead_function_eliminator.py')
 
 # Global cache for tests (we have multiple TestCase instances; this object lets them share data)
 
@@ -35,7 +36,7 @@ GlobalCache = {}
 
 class Dummy: pass
 Settings = Dummy()
-Settings.saveJS = False
+Settings.saveJS = 0
 
 # Core test runner class, shared between normal tests and benchmarks
 
@@ -43,9 +44,10 @@ class RunnerCore(unittest.TestCase):
   def tearDown(self):
     if Settings.saveJS:
       for name in os.listdir(self.get_dir()):
-        if name[-3:] == '.js':
+        if name.endswith(('.o.js', '.cc.js')):
+          suff = '.'.join(name.split('.')[-2:])
           shutil.copy(os.path.join(self.get_dir(), name),
-                      os.path.join(TEMP_DIR, self.id().replace('__main__.', '').replace('.test_', '.')+'.js'))
+                      os.path.join(TEMP_DIR, self.id().replace('__main__.', '').replace('.test_', '.')+'.'+suff))
 
   def skip(self):
     print >> sys.stderr, '<skip> ',
@@ -116,6 +118,12 @@ class RunnerCore(unittest.TestCase):
     if optimization_level > 1:
       LLVM_OPT_OPTS.append('-constmerge')
 
+  # Emscripten optimizations that we run on the .ll file
+  def do_ll_opts(self, filename):
+    shutil.move(filename + '.o.ll', filename + '.o.ll.orig')
+    output = Popen(['python', DFE, filename + '.o.ll.orig', filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
+    assert os.path.exists(filename + '.o.ll'), 'Failed to run ll optimizations'
+
   # Optional LLVM optimizations
   def do_llvm_opts(self, filename):
     if LLVM_OPTS:
@@ -124,13 +132,47 @@ class RunnerCore(unittest.TestCase):
 
   def do_llvm_dis(self, filename):
     # LLVM binary ==> LLVM assembly
+    try:
+      os.remove(filename + '.o.ll')
+    except:
+      pass
     Popen([LLVM_DIS, filename + '.o'] + LLVM_DIS_OPTS + ['-o=' + filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
     assert os.path.exists(filename + '.o.ll'), 'Could not create .ll file'
 
+  def do_llvm_as(self, source, target):
+    # LLVM assembly ==> LLVM binary
+    try:
+      os.remove(target)
+    except:
+      pass
+    Popen([LLVM_AS, source, '-o=' + target], stdout=PIPE, stderr=STDOUT).communicate()[0]
+    assert os.path.exists(target), 'Could not create bc file'
+
   def do_link(self, files, target):
     output = Popen([LLVM_LINK] + files + ['-o', target], stdout=PIPE, stderr=STDOUT).communicate()[0]
     assert output is None or 'Could not open input file' not in output, 'Linking error: ' + output
 
+  def prep_ll_test(self, filename, ll_file, force_recompile=False, build_ll_hook=None):
+    if ll_file.endswith(('.bc', '.o')):
+      if ll_file != filename + '.o':
+        shutil.copy(ll_file, filename + '.o')
+      self.do_llvm_dis(filename)
+    else:
+      shutil.copy(ll_file, filename + '.o.ll')
+
+    force_recompile = force_recompile or os.stat(filename + '.o.ll').st_size > 50000 # if the file is big, recompile just to get ll_opts
+
+    if LLVM_OPTS or force_recompile or build_ll_hook:
+      self.do_ll_opts(filename)
+      if build_ll_hook:
+        build_ll_hook(filename)
+      shutil.move(filename + '.o.ll', filename + '.o.ll.pre')
+      self.do_llvm_as(filename + '.o.ll.pre', filename + '.o')
+      output = Popen([LLVM_AS, filename + '.o.ll.pre'] + ['-o=' + filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0]
+      assert 'error:' not in output, 'Error in llvm-as: ' + output
+      self.do_llvm_opts(filename)
+      self.do_llvm_dis(filename)
+
   # Build JavaScript code from source code
   def build(self, src, dirname, filename, output_processor=None, main_file=None, additional_files=[], libraries=[], includes=[], build_ll_hook=None):
     # Copy over necessary files for compiling the source
@@ -179,12 +221,7 @@ class RunnerCore(unittest.TestCase):
         raise Exception("Linkage error");
 
     # Finalize
-    self.do_llvm_opts(filename)
-
-    self.do_llvm_dis(filename)
-
-    if build_ll_hook:
-      build_ll_hook(filename)
+    self.prep_ll_test(filename, filename + '.o', build_ll_hook=build_ll_hook)
 
     self.do_emscripten(filename, output_processor)
 
@@ -274,22 +311,6 @@ if 'benchmark' not in sys.argv:
 
         #shutil.rmtree(dirname) # TODO: leave no trace in memory. But for now nice for debugging
 
-    def prep_ll_test(self, filename, ll_file, force_recompile=False, build_ll_hook=None):
-      if ll_file.endswith(('.bc', '.o')):
-        shutil.copy(ll_file, filename + '.o')
-        self.do_llvm_dis(filename)
-      else:
-        shutil.copy(ll_file, filename + '.o.ll')
-
-      if LLVM_OPTS or force_recompile or build_ll_hook:
-        if build_ll_hook:
-          build_ll_hook(filename)
-        shutil.move(filename + '.o.ll', filename + '.o.ll.pre')
-        output = Popen([LLVM_AS, filename + '.o.ll.pre'] + ['-o=' + filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0]
-        assert 'error:' not in output, 'Error in llvm-as: ' + output
-        self.do_llvm_opts(filename)
-        Popen([LLVM_DIS, filename + '.o'] + LLVM_DIS_OPTS + ['-o=' + filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
-
     # No building - just process an existing .ll file (or .bc, which we turn into .ll)
     def do_ll_test(self, ll_file, expected_output=None, args=[], js_engines=None, output_nicerizer=None, post_build=None, force_recompile=False, build_ll_hook=None):
       if COMPILER != LLVM_GCC: return self.skip() # We use existing .ll, so which compiler is unimportant
@@ -2046,6 +2067,30 @@ if 'benchmark' not in sys.argv:
       self.do_test(src, build_ll_hook=self.do_autodebug)
       self.do_test(src, 'line: ', build_ll_hook=self.do_autodebug)
 
+    def test_dfe(self):
+      global COMPILER_TEST_OPTS; COMPILER_TEST_OPTS = ['-g']
+
+      def hook(filename):
+        ll = open(filename + '.o.ll').read()
+        assert 'unneeded' not in ll, 'DFE should remove the unneeded function'
+
+      src = '''
+          #include <stdio.h>
+
+          void unneeded()
+          {
+            printf("some totally useless stuff\\n");
+          }
+
+          int main()
+          {
+            printf("*hello slim world*\\n");
+            return 0;
+          }
+        '''
+      # Using build_ll_hook forces a recompile, which leads to DFE being done even without opts
+      self.do_test(src, '*hello slim world*', build_ll_hook=hook)
+
     ### Integration tests
 
     def test_scriptaclass(self):
@@ -2391,7 +2436,7 @@ else:
 
         cc_output = Popen(['java', '-jar', CLOSURE_COMPILER,
                            '--compilation_level', 'ADVANCED_OPTIMIZATIONS',
-                           #'--formatting', 'PRETTY_PRINT',
+                           '--formatting', 'PRETTY_PRINT',
                            '--variable_map_output_file', filename + '.vars',
                            '--js', filename + '.o.js', '--js_output_file', filename + '.cc.js'], stdout=PIPE, stderr=STDOUT).communicate()[0]
         if 'ERROR' in cc_output:
diff --git a/tools/dead_function_eliminator.py b/tools/dead_function_eliminator.py
new file mode 100644
index 00000000..a694ce05
--- /dev/null
+++ b/tools/dead_function_eliminator.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+'''
+LLVM doesn't appear to have a way to remove unused functions. This little
+script will do that. It requires annotations to be in the .ll file it parses
+(run llvm-dis with -show-annotations).
+
+Closure compiler can remove unused functions, however it is much faster
+to remove them before Emscripten runs.
+'''
+
+import os, sys, re
+
+abspath = os.path.abspath(os.path.dirname(__file__))
+def path_from_root(*pathelems):
+  return os.path.join(os.path.sep, *(abspath.split(os.sep)[:-1] + list(pathelems)))
+exec(open(path_from_root('tools', 'shared.py'), 'r').read())
+
+infile = sys.argv[1]
+outfile = sys.argv[2]
+
+lines = open(infile, 'r').read().split('\n')
+
+class Dummy: pass
+
+# Discover functions
+
+functions = {}
+
+func_header = re.compile('^define[^@]* (?P<ident>@\w+)\(.* {$')
+func_footer = '}'
+func_annot = re.compile('^; \[#uses=(?P<uses>\d+)\]$')
+
+print '\nDiscovery pass 1\n'
+
+for i in range(len(lines)):
+  line = lines[i]
+  m_header = func_header.match(line)
+  if m_header:
+    m_annot = func_annot.match(lines[i-1])
+    assert m_annot
+    ident = m_header.group('ident')
+    func = functions[ident] = Dummy()
+    func.uses = int(m_annot.group('uses')) # XXX This info from LLVM is very inaccurate
+    func.callers = set()
+    func.callees = set()
+
+for ident in functions.iterkeys():
+  func = functions[ident]
+  print ident
+
+if '@main' not in functions:
+  print 'No @main found, not running DFE'
+  import shutil
+  shutil.copy(infile, outfile)
+  sys.exit(1)
+
+print '\nDiscovery pass 2\n'
+
+ident_frag = re.compile('[, ](?P<ident>@\w+)[, ()}\]]')
+metadata = re.compile('!(?P<index>\d+) = metadata !{.*')
+
+inside = None
+
+for i in range(len(lines)):
+  line = lines[i]
+  if line == func_footer:
+    inside = None
+    continue
+  m_header = func_header.match(line)
+  if m_header:
+    inside = m_header.group('ident')
+    continue
+  meta = metadata.match(line)
+  for m in re.finditer(ident_frag, line):
+    ident = m.groups('ident')[0]
+    if ident not in functions: continue
+    if inside != ident:
+      functions[ident].callers.add(inside if inside else ('GLOBAL' if not meta else 'METADATA_'+str(i)+'_'+meta.groups('index')[0]))
+      if inside:
+        functions[inside].callees.add(ident)
+
+functions['@main'].callers.add('GLOBAL')
+
+for ident in functions.iterkeys():
+  func = functions[ident]
+  print ident, func.uses, func.callers#, 'WARNING!' if func.uses != len(func.callers) else ''
+
+# Garbage collect
+
+print '\nGC pass 1\n'
+
+for ident in functions.iterkeys():
+  func = functions[ident]
+  func.root = func.marked = False
+  for caller in func.callers:
+    if caller == 'GLOBAL':
+      func.root = True
+      print 'ROOT:', ident
+      break
+
+def mark_and_recurse(func):
+  if func.marked: return
+  func.marked = True
+  for callee in func.callees:
+    if callee == 'GLOBAL': continue
+    mark_and_recurse(functions[callee])
+
+for ident in functions.iterkeys():
+  func = functions[ident]
+  if func.root:
+    mark_and_recurse(func)
+
+marked = unmarked = 0
+for ident in functions.iterkeys():
+  func = functions[ident]
+  if func.root: assert func.marked
+  print ident, func.marked
+  marked += func.marked
+  unmarked += 1-func.marked
+
+dead_metadatas = set() # metadata pruning pass
+for ident in functions.iterkeys():
+  func = functions[ident]
+  if func.marked: continue
+  for caller in func.callers:
+    if caller.startswith('METADATA_'):
+      dummy, i, index = caller.split('_')
+      lines[int(i)] = ';'
+      dead_metadatas.add(int(index))
+inner_metadata = re.compile('metadata !(?P<index>\d+)')
+for i in range(len(lines)):
+  line = lines[i]
+  if metadata.match(line):
+    lines[i] = re.sub(inner_metadata, lambda m: 'i32 0' if int(m.groups('index')[0]) in dead_metadatas else m.string[m.start():m.end()], line)
+
+print 'Marked: ', marked, ', unmarked: ', unmarked
+
+# Write
+
+print '\nWriting\n'
+
+inside = None
+marked = False
+
+target = open(outfile, 'w')
+
+for line in lines:
+  if line == func_footer:
+    inside = None
+    if marked: target.write(line + '\n')
+    continue
+  m_header = func_header.match(line)
+  if m_header:
+    inside = m_header.group('ident')
+    marked = functions[inside].marked
+#########  if metadata.match(line): continue # metadata is not enough to keep things alive
+  if line.startswith('!llvm.dbg.sp = '): continue
+  if not inside or marked:
+    target.write(line + '\n')
+
+target.close()
+
author	Alon Zakai <alonzakai@gmail.com>	2011-04-24 17:57:01 -0700
committer	Alon Zakai <alonzakai@gmail.com>	2011-04-24 17:57:01 -0700
commit	0a2001bf4f9826560be577f053dcdfd8b07dcca0 (patch)
tree	c669d257b82d0a0c333e9db33c786ab9074c7c59
parent	6327b8b954f60d6f6f9ed729373ab1ece1f8c636 (diff)