aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2011-04-24 17:57:01 -0700
committerAlon Zakai <alonzakai@gmail.com>2011-04-24 17:57:01 -0700
commit0a2001bf4f9826560be577f053dcdfd8b07dcca0 (patch)
treec669d257b82d0a0c333e9db33c786ab9074c7c59
parent6327b8b954f60d6f6f9ed729373ab1ece1f8c636 (diff)
dead function elimination tool
-rw-r--r--tests/runner.py97
-rw-r--r--tools/dead_function_eliminator.py163
2 files changed, 234 insertions, 26 deletions
diff --git a/tests/runner.py b/tests/runner.py
index fb3c83f3..442f5c18 100644
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -28,6 +28,7 @@ DEMANGLER = path_from_root('third_party', 'demangler.py')
NAMESPACER = path_from_root('tools', 'namespacer.py')
EMMAKEN = path_from_root('tools', 'emmaken.py')
AUTODEBUGGER = path_from_root('tools', 'autodebugger.py')
+DFE = path_from_root('tools', 'dead_function_eliminator.py')
# Global cache for tests (we have multiple TestCase instances; this object lets them share data)
@@ -35,7 +36,7 @@ GlobalCache = {}
class Dummy: pass
Settings = Dummy()
-Settings.saveJS = False
+Settings.saveJS = 0
# Core test runner class, shared between normal tests and benchmarks
@@ -43,9 +44,10 @@ class RunnerCore(unittest.TestCase):
def tearDown(self):
if Settings.saveJS:
for name in os.listdir(self.get_dir()):
- if name[-3:] == '.js':
+ if name.endswith(('.o.js', '.cc.js')):
+ suff = '.'.join(name.split('.')[-2:])
shutil.copy(os.path.join(self.get_dir(), name),
- os.path.join(TEMP_DIR, self.id().replace('__main__.', '').replace('.test_', '.')+'.js'))
+ os.path.join(TEMP_DIR, self.id().replace('__main__.', '').replace('.test_', '.')+'.'+suff))
def skip(self):
print >> sys.stderr, '<skip> ',
@@ -116,6 +118,12 @@ class RunnerCore(unittest.TestCase):
if optimization_level > 1:
LLVM_OPT_OPTS.append('-constmerge')
+ # Emscripten optimizations that we run on the .ll file
+ def do_ll_opts(self, filename):
+ shutil.move(filename + '.o.ll', filename + '.o.ll.orig')
+ output = Popen(['python', DFE, filename + '.o.ll.orig', filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
+ assert os.path.exists(filename + '.o.ll'), 'Failed to run ll optimizations'
+
# Optional LLVM optimizations
def do_llvm_opts(self, filename):
if LLVM_OPTS:
@@ -124,13 +132,47 @@ class RunnerCore(unittest.TestCase):
def do_llvm_dis(self, filename):
# LLVM binary ==> LLVM assembly
+ try:
+ os.remove(filename + '.o.ll')
+ except:
+ pass
Popen([LLVM_DIS, filename + '.o'] + LLVM_DIS_OPTS + ['-o=' + filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
assert os.path.exists(filename + '.o.ll'), 'Could not create .ll file'
+ def do_llvm_as(self, source, target):
+ # LLVM assembly ==> LLVM binary
+ try:
+ os.remove(target)
+ except:
+ pass
+ Popen([LLVM_AS, source, '-o=' + target], stdout=PIPE, stderr=STDOUT).communicate()[0]
+ assert os.path.exists(target), 'Could not create bc file'
+
def do_link(self, files, target):
output = Popen([LLVM_LINK] + files + ['-o', target], stdout=PIPE, stderr=STDOUT).communicate()[0]
assert output is None or 'Could not open input file' not in output, 'Linking error: ' + output
+ def prep_ll_test(self, filename, ll_file, force_recompile=False, build_ll_hook=None):
+ if ll_file.endswith(('.bc', '.o')):
+ if ll_file != filename + '.o':
+ shutil.copy(ll_file, filename + '.o')
+ self.do_llvm_dis(filename)
+ else:
+ shutil.copy(ll_file, filename + '.o.ll')
+
+ force_recompile = force_recompile or os.stat(filename + '.o.ll').st_size > 50000 # if the file is big, recompile just to get ll_opts
+
+ if LLVM_OPTS or force_recompile or build_ll_hook:
+ self.do_ll_opts(filename)
+ if build_ll_hook:
+ build_ll_hook(filename)
+ shutil.move(filename + '.o.ll', filename + '.o.ll.pre')
+ self.do_llvm_as(filename + '.o.ll.pre', filename + '.o')
+ output = Popen([LLVM_AS, filename + '.o.ll.pre'] + ['-o=' + filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0]
+ assert 'error:' not in output, 'Error in llvm-as: ' + output
+ self.do_llvm_opts(filename)
+ self.do_llvm_dis(filename)
+
# Build JavaScript code from source code
def build(self, src, dirname, filename, output_processor=None, main_file=None, additional_files=[], libraries=[], includes=[], build_ll_hook=None):
# Copy over necessary files for compiling the source
@@ -179,12 +221,7 @@ class RunnerCore(unittest.TestCase):
raise Exception("Linkage error");
# Finalize
- self.do_llvm_opts(filename)
-
- self.do_llvm_dis(filename)
-
- if build_ll_hook:
- build_ll_hook(filename)
+ self.prep_ll_test(filename, filename + '.o', build_ll_hook=build_ll_hook)
self.do_emscripten(filename, output_processor)
@@ -274,22 +311,6 @@ if 'benchmark' not in sys.argv:
#shutil.rmtree(dirname) # TODO: leave no trace in memory. But for now nice for debugging
- def prep_ll_test(self, filename, ll_file, force_recompile=False, build_ll_hook=None):
- if ll_file.endswith(('.bc', '.o')):
- shutil.copy(ll_file, filename + '.o')
- self.do_llvm_dis(filename)
- else:
- shutil.copy(ll_file, filename + '.o.ll')
-
- if LLVM_OPTS or force_recompile or build_ll_hook:
- if build_ll_hook:
- build_ll_hook(filename)
- shutil.move(filename + '.o.ll', filename + '.o.ll.pre')
- output = Popen([LLVM_AS, filename + '.o.ll.pre'] + ['-o=' + filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0]
- assert 'error:' not in output, 'Error in llvm-as: ' + output
- self.do_llvm_opts(filename)
- Popen([LLVM_DIS, filename + '.o'] + LLVM_DIS_OPTS + ['-o=' + filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0]
-
# No building - just process an existing .ll file (or .bc, which we turn into .ll)
def do_ll_test(self, ll_file, expected_output=None, args=[], js_engines=None, output_nicerizer=None, post_build=None, force_recompile=False, build_ll_hook=None):
if COMPILER != LLVM_GCC: return self.skip() # We use existing .ll, so which compiler is unimportant
@@ -2046,6 +2067,30 @@ if 'benchmark' not in sys.argv:
self.do_test(src, build_ll_hook=self.do_autodebug)
self.do_test(src, 'line: ', build_ll_hook=self.do_autodebug)
+ def test_dfe(self):
+ global COMPILER_TEST_OPTS; COMPILER_TEST_OPTS = ['-g']
+
+ def hook(filename):
+ ll = open(filename + '.o.ll').read()
+ assert 'unneeded' not in ll, 'DFE should remove the unneeded function'
+
+ src = '''
+ #include <stdio.h>
+
+ void unneeded()
+ {
+ printf("some totally useless stuff\\n");
+ }
+
+ int main()
+ {
+ printf("*hello slim world*\\n");
+ return 0;
+ }
+ '''
+ # Using build_ll_hook forces a recompile, which leads to DFE being done even without opts
+ self.do_test(src, '*hello slim world*', build_ll_hook=hook)
+
### Integration tests
def test_scriptaclass(self):
@@ -2391,7 +2436,7 @@ else:
cc_output = Popen(['java', '-jar', CLOSURE_COMPILER,
'--compilation_level', 'ADVANCED_OPTIMIZATIONS',
- #'--formatting', 'PRETTY_PRINT',
+ '--formatting', 'PRETTY_PRINT',
'--variable_map_output_file', filename + '.vars',
'--js', filename + '.o.js', '--js_output_file', filename + '.cc.js'], stdout=PIPE, stderr=STDOUT).communicate()[0]
if 'ERROR' in cc_output:
diff --git a/tools/dead_function_eliminator.py b/tools/dead_function_eliminator.py
new file mode 100644
index 00000000..a694ce05
--- /dev/null
+++ b/tools/dead_function_eliminator.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+'''
+LLVM doesn't appear to have a way to remove unused functions. This little
+script will do that. It requires annotations to be in the .ll file it parses
+(run llvm-dis with -show-annotations).
+
+Closure compiler can remove unused functions, however it is much faster
+to remove them before Emscripten runs.
+'''
+
+import os, sys, re
+
+abspath = os.path.abspath(os.path.dirname(__file__))
+def path_from_root(*pathelems):
+ return os.path.join(os.path.sep, *(abspath.split(os.sep)[:-1] + list(pathelems)))
+exec(open(path_from_root('tools', 'shared.py'), 'r').read())
+
+infile = sys.argv[1]
+outfile = sys.argv[2]
+
+lines = open(infile, 'r').read().split('\n')
+
+class Dummy: pass
+
+# Discover functions
+
+functions = {}
+
+func_header = re.compile('^define[^@]* (?P<ident>@\w+)\(.* {$')
+func_footer = '}'
+func_annot = re.compile('^; \[#uses=(?P<uses>\d+)\]$')
+
+print '\nDiscovery pass 1\n'
+
+for i in range(len(lines)):
+ line = lines[i]
+ m_header = func_header.match(line)
+ if m_header:
+ m_annot = func_annot.match(lines[i-1])
+ assert m_annot
+ ident = m_header.group('ident')
+ func = functions[ident] = Dummy()
+ func.uses = int(m_annot.group('uses')) # XXX This info from LLVM is very inaccurate
+ func.callers = set()
+ func.callees = set()
+
+for ident in functions.iterkeys():
+ func = functions[ident]
+ print ident
+
+if '@main' not in functions:
+ print 'No @main found, not running DFE'
+ import shutil
+ shutil.copy(infile, outfile)
+ sys.exit(1)
+
+print '\nDiscovery pass 2\n'
+
+ident_frag = re.compile('[, ](?P<ident>@\w+)[, ()}\]]')
+metadata = re.compile('!(?P<index>\d+) = metadata !{.*')
+
+inside = None
+
+for i in range(len(lines)):
+ line = lines[i]
+ if line == func_footer:
+ inside = None
+ continue
+ m_header = func_header.match(line)
+ if m_header:
+ inside = m_header.group('ident')
+ continue
+ meta = metadata.match(line)
+ for m in re.finditer(ident_frag, line):
+ ident = m.groups('ident')[0]
+ if ident not in functions: continue
+ if inside != ident:
+ functions[ident].callers.add(inside if inside else ('GLOBAL' if not meta else 'METADATA_'+str(i)+'_'+meta.groups('index')[0]))
+ if inside:
+ functions[inside].callees.add(ident)
+
+functions['@main'].callers.add('GLOBAL')
+
+for ident in functions.iterkeys():
+ func = functions[ident]
+ print ident, func.uses, func.callers#, 'WARNING!' if func.uses != len(func.callers) else ''
+
+# Garbage collect
+
+print '\nGC pass 1\n'
+
+for ident in functions.iterkeys():
+ func = functions[ident]
+ func.root = func.marked = False
+ for caller in func.callers:
+ if caller == 'GLOBAL':
+ func.root = True
+ print 'ROOT:', ident
+ break
+
+def mark_and_recurse(func):
+ if func.marked: return
+ func.marked = True
+ for callee in func.callees:
+ if callee == 'GLOBAL': continue
+ mark_and_recurse(functions[callee])
+
+for ident in functions.iterkeys():
+ func = functions[ident]
+ if func.root:
+ mark_and_recurse(func)
+
+marked = unmarked = 0
+for ident in functions.iterkeys():
+ func = functions[ident]
+ if func.root: assert func.marked
+ print ident, func.marked
+ marked += func.marked
+ unmarked += 1-func.marked
+
+dead_metadatas = set() # metadata pruning pass
+for ident in functions.iterkeys():
+ func = functions[ident]
+ if func.marked: continue
+ for caller in func.callers:
+ if caller.startswith('METADATA_'):
+ dummy, i, index = caller.split('_')
+ lines[int(i)] = ';'
+ dead_metadatas.add(int(index))
+inner_metadata = re.compile('metadata !(?P<index>\d+)')
+for i in range(len(lines)):
+ line = lines[i]
+ if metadata.match(line):
+ lines[i] = re.sub(inner_metadata, lambda m: 'i32 0' if int(m.groups('index')[0]) in dead_metadatas else m.string[m.start():m.end()], line)
+
+print 'Marked: ', marked, ', unmarked: ', unmarked
+
+# Write
+
+print '\nWriting\n'
+
+inside = None
+marked = False
+
+target = open(outfile, 'w')
+
+for line in lines:
+ if line == func_footer:
+ inside = None
+ if marked: target.write(line + '\n')
+ continue
+ m_header = func_header.match(line)
+ if m_header:
+ inside = m_header.group('ident')
+ marked = functions[inside].marked
+######### if metadata.match(line): continue # metadata is not enough to keep things alive
+ if line.startswith('!llvm.dbg.sp = '): continue
+ if not inside or marked:
+ target.write(line + '\n')
+
+target.close()
+