diff options
author | Alon Zakai <alonzakai@gmail.com> | 2011-04-24 17:57:01 -0700 |
---|---|---|
committer | Alon Zakai <alonzakai@gmail.com> | 2011-04-24 17:57:01 -0700 |
commit | 0a2001bf4f9826560be577f053dcdfd8b07dcca0 (patch) | |
tree | c669d257b82d0a0c333e9db33c786ab9074c7c59 | |
parent | 6327b8b954f60d6f6f9ed729373ab1ece1f8c636 (diff) |
dead function elimination tool
-rw-r--r-- | tests/runner.py | 97 | ||||
-rw-r--r-- | tools/dead_function_eliminator.py | 163 |
2 files changed, 234 insertions, 26 deletions
diff --git a/tests/runner.py b/tests/runner.py index fb3c83f3..442f5c18 100644 --- a/tests/runner.py +++ b/tests/runner.py @@ -28,6 +28,7 @@ DEMANGLER = path_from_root('third_party', 'demangler.py') NAMESPACER = path_from_root('tools', 'namespacer.py') EMMAKEN = path_from_root('tools', 'emmaken.py') AUTODEBUGGER = path_from_root('tools', 'autodebugger.py') +DFE = path_from_root('tools', 'dead_function_eliminator.py') # Global cache for tests (we have multiple TestCase instances; this object lets them share data) @@ -35,7 +36,7 @@ GlobalCache = {} class Dummy: pass Settings = Dummy() -Settings.saveJS = False +Settings.saveJS = 0 # Core test runner class, shared between normal tests and benchmarks @@ -43,9 +44,10 @@ class RunnerCore(unittest.TestCase): def tearDown(self): if Settings.saveJS: for name in os.listdir(self.get_dir()): - if name[-3:] == '.js': + if name.endswith(('.o.js', '.cc.js')): + suff = '.'.join(name.split('.')[-2:]) shutil.copy(os.path.join(self.get_dir(), name), - os.path.join(TEMP_DIR, self.id().replace('__main__.', '').replace('.test_', '.')+'.js')) + os.path.join(TEMP_DIR, self.id().replace('__main__.', '').replace('.test_', '.')+'.'+suff)) def skip(self): print >> sys.stderr, '<skip> ', @@ -116,6 +118,12 @@ class RunnerCore(unittest.TestCase): if optimization_level > 1: LLVM_OPT_OPTS.append('-constmerge') + # Emscripten optimizations that we run on the .ll file + def do_ll_opts(self, filename): + shutil.move(filename + '.o.ll', filename + '.o.ll.orig') + output = Popen(['python', DFE, filename + '.o.ll.orig', filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0] + assert os.path.exists(filename + '.o.ll'), 'Failed to run ll optimizations' + # Optional LLVM optimizations def do_llvm_opts(self, filename): if LLVM_OPTS: @@ -124,13 +132,47 @@ class RunnerCore(unittest.TestCase): def do_llvm_dis(self, filename): # LLVM binary ==> LLVM assembly + try: + os.remove(filename + '.o.ll') + except: + pass Popen([LLVM_DIS, filename + '.o'] + LLVM_DIS_OPTS + ['-o=' + filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0] assert os.path.exists(filename + '.o.ll'), 'Could not create .ll file' + def do_llvm_as(self, source, target): + # LLVM assembly ==> LLVM binary + try: + os.remove(target) + except: + pass + Popen([LLVM_AS, source, '-o=' + target], stdout=PIPE, stderr=STDOUT).communicate()[0] + assert os.path.exists(target), 'Could not create bc file' + def do_link(self, files, target): output = Popen([LLVM_LINK] + files + ['-o', target], stdout=PIPE, stderr=STDOUT).communicate()[0] assert output is None or 'Could not open input file' not in output, 'Linking error: ' + output + def prep_ll_test(self, filename, ll_file, force_recompile=False, build_ll_hook=None): + if ll_file.endswith(('.bc', '.o')): + if ll_file != filename + '.o': + shutil.copy(ll_file, filename + '.o') + self.do_llvm_dis(filename) + else: + shutil.copy(ll_file, filename + '.o.ll') + + force_recompile = force_recompile or os.stat(filename + '.o.ll').st_size > 50000 # if the file is big, recompile just to get ll_opts + + if LLVM_OPTS or force_recompile or build_ll_hook: + self.do_ll_opts(filename) + if build_ll_hook: + build_ll_hook(filename) + shutil.move(filename + '.o.ll', filename + '.o.ll.pre') + self.do_llvm_as(filename + '.o.ll.pre', filename + '.o') + output = Popen([LLVM_AS, filename + '.o.ll.pre'] + ['-o=' + filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0] + assert 'error:' not in output, 'Error in llvm-as: ' + output + self.do_llvm_opts(filename) + self.do_llvm_dis(filename) + # Build JavaScript code from source code def build(self, src, dirname, filename, output_processor=None, main_file=None, additional_files=[], libraries=[], includes=[], build_ll_hook=None): # Copy over necessary files for compiling the source @@ -179,12 +221,7 @@ class RunnerCore(unittest.TestCase): raise Exception("Linkage error"); # Finalize - self.do_llvm_opts(filename) - - self.do_llvm_dis(filename) - - if build_ll_hook: - build_ll_hook(filename) + self.prep_ll_test(filename, filename + '.o', build_ll_hook=build_ll_hook) self.do_emscripten(filename, output_processor) @@ -274,22 +311,6 @@ if 'benchmark' not in sys.argv: #shutil.rmtree(dirname) # TODO: leave no trace in memory. But for now nice for debugging - def prep_ll_test(self, filename, ll_file, force_recompile=False, build_ll_hook=None): - if ll_file.endswith(('.bc', '.o')): - shutil.copy(ll_file, filename + '.o') - self.do_llvm_dis(filename) - else: - shutil.copy(ll_file, filename + '.o.ll') - - if LLVM_OPTS or force_recompile or build_ll_hook: - if build_ll_hook: - build_ll_hook(filename) - shutil.move(filename + '.o.ll', filename + '.o.ll.pre') - output = Popen([LLVM_AS, filename + '.o.ll.pre'] + ['-o=' + filename + '.o'], stdout=PIPE, stderr=STDOUT).communicate()[0] - assert 'error:' not in output, 'Error in llvm-as: ' + output - self.do_llvm_opts(filename) - Popen([LLVM_DIS, filename + '.o'] + LLVM_DIS_OPTS + ['-o=' + filename + '.o.ll'], stdout=PIPE, stderr=STDOUT).communicate()[0] - # No building - just process an existing .ll file (or .bc, which we turn into .ll) def do_ll_test(self, ll_file, expected_output=None, args=[], js_engines=None, output_nicerizer=None, post_build=None, force_recompile=False, build_ll_hook=None): if COMPILER != LLVM_GCC: return self.skip() # We use existing .ll, so which compiler is unimportant @@ -2046,6 +2067,30 @@ if 'benchmark' not in sys.argv: self.do_test(src, build_ll_hook=self.do_autodebug) self.do_test(src, 'line: ', build_ll_hook=self.do_autodebug) + def test_dfe(self): + global COMPILER_TEST_OPTS; COMPILER_TEST_OPTS = ['-g'] + + def hook(filename): + ll = open(filename + '.o.ll').read() + assert 'unneeded' not in ll, 'DFE should remove the unneeded function' + + src = ''' + #include <stdio.h> + + void unneeded() + { + printf("some totally useless stuff\\n"); + } + + int main() + { + printf("*hello slim world*\\n"); + return 0; + } + ''' + # Using build_ll_hook forces a recompile, which leads to DFE being done even without opts + self.do_test(src, '*hello slim world*', build_ll_hook=hook) + ### Integration tests def test_scriptaclass(self): @@ -2391,7 +2436,7 @@ else: cc_output = Popen(['java', '-jar', CLOSURE_COMPILER, '--compilation_level', 'ADVANCED_OPTIMIZATIONS', - #'--formatting', 'PRETTY_PRINT', + '--formatting', 'PRETTY_PRINT', '--variable_map_output_file', filename + '.vars', '--js', filename + '.o.js', '--js_output_file', filename + '.cc.js'], stdout=PIPE, stderr=STDOUT).communicate()[0] if 'ERROR' in cc_output: diff --git a/tools/dead_function_eliminator.py b/tools/dead_function_eliminator.py new file mode 100644 index 00000000..a694ce05 --- /dev/null +++ b/tools/dead_function_eliminator.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python + +''' +LLVM doesn't appear to have a way to remove unused functions. This little +script will do that. It requires annotations to be in the .ll file it parses +(run llvm-dis with -show-annotations). + +Closure compiler can remove unused functions, however it is much faster +to remove them before Emscripten runs. +''' + +import os, sys, re + +abspath = os.path.abspath(os.path.dirname(__file__)) +def path_from_root(*pathelems): + return os.path.join(os.path.sep, *(abspath.split(os.sep)[:-1] + list(pathelems))) +exec(open(path_from_root('tools', 'shared.py'), 'r').read()) + +infile = sys.argv[1] +outfile = sys.argv[2] + +lines = open(infile, 'r').read().split('\n') + +class Dummy: pass + +# Discover functions + +functions = {} + +func_header = re.compile('^define[^@]* (?P<ident>@\w+)\(.* {$') +func_footer = '}' +func_annot = re.compile('^; \[#uses=(?P<uses>\d+)\]$') + +print '\nDiscovery pass 1\n' + +for i in range(len(lines)): + line = lines[i] + m_header = func_header.match(line) + if m_header: + m_annot = func_annot.match(lines[i-1]) + assert m_annot + ident = m_header.group('ident') + func = functions[ident] = Dummy() + func.uses = int(m_annot.group('uses')) # XXX This info from LLVM is very inaccurate + func.callers = set() + func.callees = set() + +for ident in functions.iterkeys(): + func = functions[ident] + print ident + +if '@main' not in functions: + print 'No @main found, not running DFE' + import shutil + shutil.copy(infile, outfile) + sys.exit(1) + +print '\nDiscovery pass 2\n' + +ident_frag = re.compile('[, ](?P<ident>@\w+)[, ()}\]]') +metadata = re.compile('!(?P<index>\d+) = metadata !{.*') + +inside = None + +for i in range(len(lines)): + line = lines[i] + if line == func_footer: + inside = None + continue + m_header = func_header.match(line) + if m_header: + inside = m_header.group('ident') + continue + meta = metadata.match(line) + for m in re.finditer(ident_frag, line): + ident = m.groups('ident')[0] + if ident not in functions: continue + if inside != ident: + functions[ident].callers.add(inside if inside else ('GLOBAL' if not meta else 'METADATA_'+str(i)+'_'+meta.groups('index')[0])) + if inside: + functions[inside].callees.add(ident) + +functions['@main'].callers.add('GLOBAL') + +for ident in functions.iterkeys(): + func = functions[ident] + print ident, func.uses, func.callers#, 'WARNING!' if func.uses != len(func.callers) else '' + +# Garbage collect + +print '\nGC pass 1\n' + +for ident in functions.iterkeys(): + func = functions[ident] + func.root = func.marked = False + for caller in func.callers: + if caller == 'GLOBAL': + func.root = True + print 'ROOT:', ident + break + +def mark_and_recurse(func): + if func.marked: return + func.marked = True + for callee in func.callees: + if callee == 'GLOBAL': continue + mark_and_recurse(functions[callee]) + +for ident in functions.iterkeys(): + func = functions[ident] + if func.root: + mark_and_recurse(func) + +marked = unmarked = 0 +for ident in functions.iterkeys(): + func = functions[ident] + if func.root: assert func.marked + print ident, func.marked + marked += func.marked + unmarked += 1-func.marked + +dead_metadatas = set() # metadata pruning pass +for ident in functions.iterkeys(): + func = functions[ident] + if func.marked: continue + for caller in func.callers: + if caller.startswith('METADATA_'): + dummy, i, index = caller.split('_') + lines[int(i)] = ';' + dead_metadatas.add(int(index)) +inner_metadata = re.compile('metadata !(?P<index>\d+)') +for i in range(len(lines)): + line = lines[i] + if metadata.match(line): + lines[i] = re.sub(inner_metadata, lambda m: 'i32 0' if int(m.groups('index')[0]) in dead_metadatas else m.string[m.start():m.end()], line) + +print 'Marked: ', marked, ', unmarked: ', unmarked + +# Write + +print '\nWriting\n' + +inside = None +marked = False + +target = open(outfile, 'w') + +for line in lines: + if line == func_footer: + inside = None + if marked: target.write(line + '\n') + continue + m_header = func_header.match(line) + if m_header: + inside = m_header.group('ident') + marked = functions[inside].marked +######### if metadata.match(line): continue # metadata is not enough to keep things alive + if line.startswith('!llvm.dbg.sp = '): continue + if not inside or marked: + target.write(line + '\n') + +target.close() + |