aboutsummaryrefslogtreecommitdiff
path: root/emscripten.py
diff options
context:
space:
mode:
authorkripken <alonzakai@gmail.com>2011-07-09 10:28:19 -0700
committerkripken <alonzakai@gmail.com>2011-07-09 10:28:19 -0700
commitc79828f6b89cc64cb21ebfa00454b8ce26718cf8 (patch)
tree2f7250c054c7ff46088b3877b619f5715699b66f /emscripten.py
parent4fe5b2b7b6d9695a81880922ee249c620880b5d2 (diff)
parent11e404d7a8c76dbc1f10e24f4cee527f36d20092 (diff)
Merge pull request #47 from max99x/master
Emscripten.py rewrite
Diffstat (limited to 'emscripten.py')
-rwxr-xr-xemscripten.py251
1 files changed, 216 insertions, 35 deletions
diff --git a/emscripten.py b/emscripten.py
index ec2c1662..6fe7b504 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -1,43 +1,224 @@
#!/usr/bin/python
-import os, sys, subprocess
+import argparse
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import tools.shared as shared
-abspath = os.path.abspath(os.path.dirname(__file__))
-def path_from_root(*pathelems):
- return os.path.join(os.path.sep, *(abspath.split(os.sep) + list(pathelems)))
-exec(open(path_from_root('tools', 'shared.py'), 'r').read())
-COMPILER = path_from_root('src', 'compiler.js')
+# Temporary files that should be deleted once the program is finished.
+TEMP_FILES_TO_CLEAN = []
+# The data layout used by llvm-gcc (as opposed to clang, which doesn't have the
+# f128:128:128 part).
+GCC_DATA_LAYOUT = ('target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16'
+ '-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64'
+ '-v128:128:128-a0:0:64-f80:32:32-f128:128:128-n8:16:32"')
+
+
+def path_from_root(*target):
+ """Returns the absolute path to the target from the emscripten root."""
+ abspath = os.path.abspath(os.path.dirname(__file__))
+ return os.path.join(os.path.sep, *(abspath.split(os.sep) + list(target)))
+
+
+def get_temp_file(suffix):
+ """Returns a named temp file with the given prefix."""
+ named_file = tempfile.NamedTemporaryFile(
+ dir=shared.TEMP_DIR, suffix=suffix, delete=False)
+ TEMP_FILES_TO_CLEAN.append(named_file.name)
+ return named_file
+
+
+def assemble(filepath):
+ """Converts human-readable LLVM assembly to binary LLVM bitcode.
+
+ Args:
+ filepath: The path to the file to assemble. If the name ends with ".bc", the
+ file is assumed to be in bitcode format already.
+
+ Returns:
+ The path to the assembled file.
+ """
+ if not filepath.endswith('.bc'):
+ command = [shared.LLVM_AS, '-o=-', filepath]
+ with get_temp_file('.bc') as out: ret = subprocess.call(command, stdout=out)
+ if ret != 0: raise RuntimeError('Could not assemble %s.' % filepath)
+ filepath = out.name
+ return filepath
+
+
+def disassemble(filepath):
+ """Converts binary LLVM bitcode to human-readable LLVM assembly.
+
+ Args:
+ filepath: The path to the file to disassemble. If the name ends with ".ll",
+ the file is assumed to be in human-readable assembly format already.
+
+ Returns:
+ The path to the disassembled file.
+ """
+ if not filepath.endswith('.ll'):
+ command = [shared.LLVM_DIS, '-o=-', filepath] + shared.LLVM_DIS_OPTS
+ with get_temp_file('.ll') as out: ret = subprocess.call(command, stdout=out)
+ if ret != 0: raise RuntimeError('Could not disassemble %s.' % filepath)
+ filepath = out.name
+ return filepath
+
+
+def optimize(filepath):
+ """Runs LLVM's optimization passes on a given bitcode file.
+
+ Args:
+ filepath: The path to the bitcode file to optimize.
+
+ Returns:
+ The path to the optimized file.
+ """
+ command = [shared.LLVM_OPT, '-o=-', filepath] + shared.pick_llvm_opts(3, True)
+ with get_temp_file('.bc') as out: ret = subprocess.call(command, stdout=out)
+ if ret != 0: raise RuntimeError('Could not optimize %s.' % filepath)
+ return out.name
+
+
+def link(*objects):
+ """Links multiple LLVM bitcode files into a single file.
+
+ Args:
+ objects: The bitcode files to link.
+
+ Returns:
+ The path to the linked file.
+ """
+ command = [shared.LLVM_LINK] + list(objects)
+ with get_temp_file('.bc') as out: ret = subprocess.call(command, stdout=out)
+ if ret != 0: raise RuntimeError('Could not link %s.' % objects)
+ return out.name
+
+
+def compile_malloc(compiler):
+ """Compiles dlmalloc to LLVM bitcode.
+
+ Args:
+ compiler: The compiler command to use, a path to either clang or llvm-gcc.
+
+ Returns:
+ The path to the compiled dlmalloc as an LLVM bitcode (.bc) file.
+ """
+ src = path_from_root('src', 'dlmalloc.c')
+ includes = '-I' + path_from_root('src', 'include')
+ command = [compiler, '-c', '-g', '-emit-llvm', '-m32', '-o-', includes, src]
+ with get_temp_file('.bc') as out: ret = subprocess.call(command, stdout=out)
+ if ret != 0: raise RuntimeError('Could not compile dlmalloc.')
+ return out.name
+
+
+def determine_compiler(filepath):
+ """Determines whether a given file uses llvm-gcc or clang data layout.
+
+ Args:
+ filepath: The .bc or .ll file containing the bitcode/assembly to test.
+
+ Returns:
+ The path to the compiler, either llvm-gcc or clang.
+ """
+ assembly = open(disassemble(filepath)).read()
+ is_gcc = GCC_DATA_LAYOUT in assembly
+ return shared.to_cc(shared.LLVM_GCC if is_gcc else shared.CLANG)
+
+
+def has_annotations(filepath):
+ """Tests whether an assembly file contains annotations.
+
+ Args:
+ filepath: The .ll file containing the assembly to check.
+
+ Returns:
+ Whether the provided file is valid assembly and has annotations.
+ """
+ return filepath.endswith('.ll') and '[#uses=' in open(filepath).read()
+
+
+def emscript(infile, settings, outfile):
+ """Runs the emscripten LLVM-to-JS compiler.
+
+ Args:
+ infile: The path to the input LLVM assembly file.
+ settings: JSON-formatted string of settings that overrides the values
+ defined in src/settings.js.
+ outfile: The file where the output is written.
+ """
+ data = open(infile, 'r').read()
+ compiler = path_from_root('src', 'compiler.js')
+ subprocess.Popen(shared.COMPILER_ENGINE + [compiler],
+ stdin=subprocess.PIPE,
+ stdout=outfile,
+ cwd=path_from_root('src'),
+ stderr=subprocess.STDOUT).communicate(settings + '\n' + data)
+ outfile.close()
+
+
+def main(args):
+ # Construct a final linked and disassembled file.
+ if args.dlmalloc or args.optimize or not has_annotations(args.infile):
+ args.infile = assemble(args.infile)
+ if args.dlmalloc:
+ malloc = compile_malloc(determine_compiler(args.infile))
+ args.infile = link(args.infile, malloc)
+ if args.optimize: args.infile = optimize(args.infile)
+ args.infile = disassemble(args.infile)
+
+ # Prepare settings for serialization to JSON.
+ settings = {}
+ for setting in args.settings:
+ name, value = setting.split('=', 1)
+ settings[name] = json.loads(value)
+
+ # Adjust sign correction for dlmalloc.
+ if args.dlmalloc:
+ CORRECT_SIGNS = settings.get('CORRECT_SIGNS', 0)
+ if CORRECT_SIGNS in (0, 2):
+ path = path_from_root('src', 'dlmalloc.c')
+ old_lines = settings.get('CORRECT_SIGNS_LINES', [])
+ line_nums = [4816, 4191, 4246, 4199, 4205, 4235, 4227]
+ lines = old_lines + [path + ':' + str(i) for i in line_nums]
+ settings['CORRECT_SIGNS'] = 2
+ settings['CORRECT_SIGNS_LINES'] = lines
+
+ # Compile the assembly to Javascript.
+ emscript(args.infile, json.dumps(settings), args.outfile)
-def emscripten(filename, settings, outfile):
- data = open(filename, 'r').read()
- try:
- cwd = os.getcwd()
- except:
- cwd = None
- os.chdir(os.path.dirname(COMPILER))
- subprocess.Popen(COMPILER_ENGINE + [COMPILER], stdin=subprocess.PIPE, stdout=outfile, stderr=subprocess.STDOUT).communicate(settings+'\n'+data)
- if outfile: outfile.close()
- if cwd is not None:
- os.chdir(cwd)
if __name__ == '__main__':
- if sys.argv.__len__() not in range(2,6):
- print '''
-Emscripten usage: emscripten.py INFILE [SETTINGS] [OUTPUT_FILE]
-
- INFILE must be in human-readable LLVM disassembly form (i.e., as text,
- not binary).
- SETTINGS is an optional set of compiler settings, overriding the defaults,
- in JSON format. See src/settings.js.
- OUTPUT_FILE is the file to create with the output. If not given, we write
- to stdout.
-
- You should have an ~/.emscripten file set up, see tests/settings.py, which
- in particular includes COMPILER_ENGINE.
-'''
- else:
- settings = sys.argv[2] if len(sys.argv) >= 3 else "{}"
- outfile = open(sys.argv[3], 'w') if len(sys.argv) >= 4 else None
- emscripten(sys.argv[1], settings, outfile)
+ parser = argparse.ArgumentParser(
+ description='Compile LLVM assembly to Javascript.',
+ epilog='You should have an ~/.emscripten file set up; see settings.py.')
+ parser.add_argument('infile',
+ help='The LLVM assembly file to compile, either in '
+ 'human-readable (*.ll) or in bitcode (*.bc) format.')
+ parser.add_argument('-O', '--optimize',
+ default=False,
+ action='store_true',
+ help='Run LLVM optimizations on the input.')
+ parser.add_argument('-m', '--dlmalloc',
+ default=False,
+ action='store_true',
+ help='Use dlmalloc. Without, uses a dummy allocator.')
+ parser.add_argument('-o', '--outfile',
+ default=sys.stdout,
+ type=argparse.FileType('w'),
+ help='Where to write the output; defaults to stdout.')
+ parser.add_argument('-s', '--settings',
+ default=[],
+ nargs=argparse.ZERO_OR_MORE,
+ metavar='FOO=BAR',
+ help='Overrides for settings defined in settings.js.')
+ try:
+ main(parser.parse_args())
+ finally:
+ # Clean up temporary files.
+ for filename in TEMP_FILES_TO_CLEAN:
+ os.unlink(filename)