diff options
73 files changed, 3502 insertions, 4422 deletions
@@ -2,7 +2,11 @@ *.pyc *~ *.bc +src/relooper*.js # Ignore generated files src/relooper.js src/relooper.js.raw.js +src/relooper/*.o +src/relooper/*.out + @@ -43,4 +43,5 @@ a license to everyone to use it as detailed in LICENSE.) * Xuejie Xiao <xxuejie@gmail.com> * Dominic Wong <dom@slowbunyip.org> * Alan Kligman <alan.kligman@gmail.com> (copyright owned by Mozilla Foundation) +* Anthony Liot <wolfviking0@yahoo.com> @@ -90,12 +90,14 @@ LLVM_OPT_LEVEL = { 3: 3, } +MEMCPY_ALIASES = ['memcpy', 'llvm.memcpy.i32', 'llvm.memcpy.i64', 'llvm.memcpy.p0i8.p0i8.i32', 'llvm.memcpy.p0i8.p0i8.i64'] + DEBUG = int(os.environ.get('EMCC_DEBUG') or 0) TEMP_DIR = os.environ.get('EMCC_TEMP_DIR') LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll files into .bc, just compile them with emscripten directly # Not recommended, this is mainly for the test runner, or if you have some other # specific need. - # One major limitation with this mode is that dlmalloc and libc++ cannot be + # One major limitation with this mode is that libc and libc++ cannot be # added in. Also, LLVM optimizations will not be done, nor dead code elimination AUTODEBUG = os.environ.get('EMCC_AUTODEBUG') # If set to 1, we will run the autodebugger (the automatic debugging tool, see tools/autodebugger). # Note that this will disable inclusion of libraries. This is useful because including @@ -338,7 +340,7 @@ Options that are modified or new in %s include: --clear-cache Manually clears the cache of compiled emscripten system libraries (libc++, - libc++abi, dlmalloc). This is normally + libc++abi, libc). This is normally handled automatically, but if you update llvm in-place (instead of having a different directory for a new version), the caching @@ -353,9 +355,9 @@ Options that are modified or new in %s include: The target file, if specified (-o <target>), defines what will be generated: - <name>.js JavaScript (default) + <name>.js JavaScript <name>.html HTML with embedded JavaScript - <name>.bc LLVM bitcode + <name>.bc LLVM bitcode (default) <name>.o LLVM bitcode (same as .bc) The -c option (which tells gcc not to run the linker) will @@ -718,8 +720,6 @@ try: if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level] if llvm_lto is None: llvm_lto = llvm_opts > 0 if closure is None: closure = 1 if opt_level >= 2 else 0 - if minify_whitespace is None: - minify_whitespace = closure # if closure is run, minify whitespace if opt_level <= 0: keep_debug = True # always keep debug in -O0 if DEBUG: start_time = time.time() # done after parsing arguments, which might affect debug state @@ -848,9 +848,23 @@ try: exec('shared.Settings.' + key + ' = ' + value) # Apply effects from settings + if shared.Settings.ASM_JS: + if closure: + print >> sys.stderr, 'emcc: warning: disabling closure because it is not compatible with asm.js code generation' + closure = False + if shared.Settings.CORRECT_SIGNS != 1: + print >> sys.stderr, 'emcc: warning: setting CORRECT_SIGNS to 1 for asm.js code generation' + shared.Settings.CORRECT_SIGNS = 1 + if shared.Settings.CORRECT_OVERFLOWS != 1: + print >> sys.stderr, 'emcc: warning: setting CORRECT_OVERFLOWS to 1 for asm.js code generation' + shared.Settings.CORRECT_OVERFLOWS = 1 + if shared.Settings.CORRECT_SIGNS >= 2 or shared.Settings.CORRECT_OVERFLOWS >= 2 or shared.Settings.CORRECT_ROUNDINGS >= 2: keep_debug = True # must keep debug info to do line-by-line operations + if minify_whitespace is None: + minify_whitespace = closure # if closure is run, minify whitespace + ## Compile source code to bitcode if DEBUG: print >> sys.stderr, 'emcc: compiling to bitcode' @@ -922,16 +936,26 @@ try: # Note that we assume a single symbol is enough to know if we have/do not have dlmalloc etc. If you # include just a few symbols but want the rest, this will not work. - # dlmalloc - def create_dlmalloc(): - if DEBUG: print >> sys.stderr, 'emcc: building dlmalloc for cache' - execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=stdout, stderr=stderr) - # we include the libc++ new stuff here, so that the common case of using just new/delete is quick to link - execute([shared.PYTHON, shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=stdout, stderr=stderr) - shared.Building.link([in_temp('dlmalloc.o'), in_temp('new.o')], in_temp('dlmalloc_full.o')) - return in_temp('dlmalloc_full.o') - def fix_dlmalloc(): - # dlmalloc needs some sign correction. # If we are in mode 0, switch to 2. We will add our lines + # libc + def create_libc(): + if DEBUG: print >> sys.stderr, 'emcc: building libc for cache' + o_s = [] + for src in ['dlmalloc.c', os.path.join('libc', 'musl', 'memcpy.c'), os.path.join('libcxx', 'new.cpp')]: + o = in_temp(os.path.basename(src) + '.o') + execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', src), '-o', o], stdout=stdout, stderr=stderr) + o_s.append(o) + shared.Building.link(o_s, in_temp('libc.bc')) + return in_temp('libc.bc') + + def fix_libc(need): + # If an intrinsic alias of memcpy is used, we need memcpy + for memcpy_alias in MEMCPY_ALIASES: + if memcpy_alias in need: + if '_memcpy' not in shared.Settings.EXPORTED_FUNCTIONS: + shared.Settings.EXPORTED_FUNCTIONS.append('_memcpy') + break + + # libc needs some sign correction. # If we are in mode 0, switch to 2. We will add our lines try: if shared.Settings.CORRECT_SIGNS == 0: raise Exception('we need to change to 2') except: # we fail if equal to 0 - so we need to switch to 2 - or if CORRECT_SIGNS is not even in Settings @@ -942,7 +966,7 @@ try: # so all is well anyhow too. # XXX We also need to add libc symbols that use malloc, for example strdup. It's very rare to use just them and not # a normal malloc symbol (like free, after calling strdup), so we haven't hit this yet, but it is possible. - dlmalloc_symbols = open(shared.path_from_root('system', 'lib', 'dlmalloc.symbols')).read().split('\n') + libc_symbols = open(shared.path_from_root('system', 'lib', 'libc.symbols')).read().split('\n') # libcxx def create_libcxx(): @@ -954,13 +978,13 @@ try: os.append(o) shared.Building.link(os, in_temp('libcxx.bc')) return in_temp('libcxx.bc') - def fix_libcxx(): + def fix_libcxx(need): assert shared.Settings.QUANTUM_SIZE == 4, 'We do not support libc++ with QUANTUM_SIZE == 1' # libcxx might need corrections, so turn them all on. TODO: check which are actually needed shared.Settings.CORRECT_SIGNS = shared.Settings.CORRECT_OVERFLOWS = shared.Settings.CORRECT_ROUNDINGS = 1 #print >> sys.stderr, 'emcc: info: using libcxx turns on CORRECT_* options' libcxx_symbols = map(lambda line: line.strip().split(' ')[1], open(shared.path_from_root('system', 'lib', 'libcxx', 'symbols')).readlines()) - libcxx_symbols = filter(lambda symbol: symbol not in dlmalloc_symbols, libcxx_symbols) + libcxx_symbols = filter(lambda symbol: symbol not in libc_symbols, libcxx_symbols) libcxx_symbols = set(libcxx_symbols) # libcxxabi - just for dynamic_cast for now @@ -973,19 +997,19 @@ try: os.append(o) shared.Building.link(os, in_temp('libcxxabi.bc')) return in_temp('libcxxabi.bc') - def fix_libcxxabi(): + def fix_libcxxabi(need): assert shared.Settings.QUANTUM_SIZE == 4, 'We do not support libc++abi with QUANTUM_SIZE == 1' #print >> sys.stderr, 'emcc: info: using libcxxabi, this may need CORRECT_* options' #shared.Settings.CORRECT_SIGNS = shared.Settings.CORRECT_OVERFLOWS = shared.Settings.CORRECT_ROUNDINGS = 1 libcxxabi_symbols = map(lambda line: line.strip().split(' ')[1], open(shared.path_from_root('system', 'lib', 'libcxxabi', 'symbols')).readlines()) - libcxxabi_symbols = filter(lambda symbol: symbol not in dlmalloc_symbols, libcxxabi_symbols) + libcxxabi_symbols = filter(lambda symbol: symbol not in libc_symbols, libcxxabi_symbols) libcxxabi_symbols = set(libcxxabi_symbols) - force = False # If we have libcxx, we must force inclusion of dlmalloc, since libcxx uses new internally. Note: this is kind of hacky + force = False # If we have libcxx, we must force inclusion of libc, since libcxx uses new internally. Note: this is kind of hacky for name, create, fix, library_symbols in [('libcxx', create_libcxx, fix_libcxx, libcxx_symbols), ('libcxxabi', create_libcxxabi, fix_libcxxabi, libcxxabi_symbols), - ('dlmalloc', create_dlmalloc, fix_dlmalloc, dlmalloc_symbols)]: + ('libc', create_libc, fix_libc, libc_symbols)]: need = set() has = set() for temp_file in temp_files: @@ -1014,7 +1038,7 @@ try: extra_files_to_link.append(libfile) force = True if fix: - fix() + fix(need) # First, combine the bitcode files if there are several. We must also link if we have a singleton .a if len(input_files) + len(extra_files_to_link) > 1 or \ @@ -1065,6 +1089,36 @@ try: shared.Building.llvm_opt(in_temp(target_basename + '.bc'), link_opts) if DEBUG: save_intermediate('linktime', 'bc') + # Optimization and lto can add new intrinsics like memcpy that were not present before. We + # are now *after* linking in libc, so we missed our chance to get memcpy - check and add it now + # if necessary + final_symbols = shared.Building.llvm_nm(final) + need_memcpy = False + for symbol in final_symbols.undefs: + if symbol in MEMCPY_ALIASES: + need_memcpy = True + break + has_memcpy = False + for symbol in final_symbols.defs: + if symbol in MEMCPY_ALIASES: + has_memcpy = True + break + if need_memcpy and not has_memcpy: + if DEBUG: print >> sys.stderr, 'memcpy intrinsic added in optimizations, linking in optimized memcpy' + memcpy = in_temp('memcpy.bc') + force_cxx = os.environ.get('EMMAKEN_CXX') + if force_cxx is not None: del os.environ['EMMAKEN_CXX'] # memcpy must be compiled as C + execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', 'libc', 'musl', 'memcpy.c'), '-o', memcpy], stdout=stdout, stderr=stderr) + if force_cxx is not None: os.environ['EMMAKEN_CXX'] = force_cxx + shared.Building.llvm_opt(memcpy, llvm_opts) # optimize it just like normal code; no point in lto though + next = final + '.postrinsics.bc' + shared.Building.link([final, memcpy], next) + final = next + if shared.Settings.ASM_JS: # export it so other library functions etc. can use it + if '_memcpy' not in shared.Settings.EXPORTED_FUNCTIONS: + shared.Settings.EXPORTED_FUNCTIONS.append('_memcpy') + if DEBUG: save_intermediate('postrinsics', 'bc') + # Prepare .ll for Emscripten if not LEAVE_INPUTS_RAW: final = shared.Building.llvm_dis(final, final + '.ll') @@ -1131,6 +1185,17 @@ try: execute(shlex.split(js_transform, posix=posix) + [os.path.abspath(final)]) if DEBUG: save_intermediate('transformed') + if shared.Settings.ASM_JS: # XXX temporary wrapping for testing purposes + print >> sys.stderr, 'emcc: ASM_JS mode is highly experimental, and will not work on most codebases yet. It is NOT recommended that you try this yet.' # XXX TODO: 0.0 instead of +0 for local var defs + unwrapped = open(final).read() + final += '.asmwrap.js' + open(final, 'w').write(''' +(function() { // prevent new Function from seeing the global scope +%s +}).apply(null, arguments); +''' % unwrapped) + if DEBUG: save_intermediate('asmwrap') + # It is useful to run several js optimizer passes together, to save on unneeded unparsing/reparsing js_optimizer_queue = [] def flush_js_optimizer_queue(): @@ -1156,11 +1221,21 @@ try: if DEBUG: save_intermediate('pretty') def get_eliminate(): - return 'eliminate' if not shared.Settings.ALLOW_MEMORY_GROWTH else 'eliminateMemSafe' + if shared.Settings.ASM_JS: + return 'eliminateAsm' + elif shared.Settings.ALLOW_MEMORY_GROWTH: + return 'eliminateMemSafe' + else: + return 'eliminate' + + def get_simplify_pre(): + if shared.Settings.ASM_JS: + return 'simplifyExpressionsPreAsm' + else: + return 'simplifyExpressionsPre' - js_optimizer_queue += [get_eliminate()] + js_optimizer_queue += [get_eliminate(), get_simplify_pre()] - js_optimizer_queue += ['simplifyExpressionsPre'] if shared.Settings.RELOOP: js_optimizer_queue += ['optimizeShiftsAggressive', get_eliminate()] # aggressive shifts optimization requires loops, it breaks on switches @@ -1170,6 +1245,8 @@ try: if DEBUG: print >> sys.stderr, 'emcc: running closure' final = shared.Building.closure_compiler(final) if DEBUG: save_intermediate('closure') + elif shared.Settings.ASM_JS and shared.Settings.RELOOP: + js_optimizer_queue += ['registerizeAsm'] # we can't use closure in asm, but this does much of the same if opt_level >= 1: if DEBUG: print >> sys.stderr, 'emcc: running post-closure post-opts' diff --git a/emscripten.py b/emscripten.py index 3c636447..ac13f7a3 100755 --- a/emscripten.py +++ b/emscripten.py @@ -129,10 +129,13 @@ def emscript(infile, settings, outfile, libraries=[]): # Save settings to a file to work around v8 issue 1579 settings_file = temp_files.get('.txt').name - settings_text = json.dumps(settings) - s = open(settings_file, 'w') - s.write(settings_text) - s.close() + def save_settings(): + global settings_text + settings_text = json.dumps(settings) + s = open(settings_file, 'w') + s.write(settings_text) + s.close() + save_settings() # Phase 1 - pre if DEBUG: t = time.time() @@ -170,6 +173,9 @@ def emscript(infile, settings, outfile, libraries=[]): if DEBUG: t = time.time() forwarded_json = json.loads(forwarded_data) indexed_functions = set() + if settings.get('ASM_JS'): + settings['EXPORTED_FUNCTIONS'] = forwarded_json['EXPORTED_FUNCTIONS'] + save_settings() chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None) @@ -223,16 +229,27 @@ def emscript(infile, settings, outfile, libraries=[]): if DEBUG: print >> sys.stderr, ' emscript: phase 2 took %s seconds' % (time.time() - t) if DEBUG: t = time.time() - funcs_js = ''.join([output[0] for output in outputs]) - + # merge forwarded data + if settings.get('ASM_JS'): + all_exported_functions = set(settings['EXPORTED_FUNCTIONS']) # both asm.js and otherwise + for additional_export in ['_malloc', '_free']: # additional functions to export from asm, if they are implemented + all_exported_functions.add(additional_export) + exported_implemented_functions = set() for func_js, curr_forwarded_data in outputs: - # merge forwarded data curr_forwarded_json = json.loads(curr_forwarded_data) forwarded_json['Types']['preciseI64MathUsed'] = forwarded_json['Types']['preciseI64MathUsed'] or curr_forwarded_json['Types']['preciseI64MathUsed'] for key, value in curr_forwarded_json['Functions']['blockAddresses'].iteritems(): forwarded_json['Functions']['blockAddresses'][key] = value for key in curr_forwarded_json['Functions']['indexedFunctions'].iterkeys(): indexed_functions.add(key) + if settings.get('ASM_JS'): + for key in curr_forwarded_json['Functions']['implementedFunctions'].iterkeys(): + if key in all_exported_functions: exported_implemented_functions.add(key) + for key, value in curr_forwarded_json['Functions']['unimplementedFunctions'].iteritems(): + forwarded_json['Functions']['unimplementedFunctions'][key] = value + + funcs_js = ''.join([output[0] for output in outputs]) + outputs = None if DEBUG: print >> sys.stderr, ' emscript: phase 2b took %s seconds' % (time.time() - t) if DEBUG: t = time.time() @@ -241,6 +258,7 @@ def emscript(infile, settings, outfile, libraries=[]): forwarded_json['Functions']['indexedFunctions'] = {} i = 2 for indexed in indexed_functions: + #print >> sys.stderr, 'indaxx', indexed, i forwarded_json['Functions']['indexedFunctions'][indexed] = i # make sure not to modify this python object later - we use it in indexize i += 2 forwarded_json['Functions']['nextIndex'] = i @@ -258,8 +276,6 @@ def emscript(infile, settings, outfile, libraries=[]): pre = None #if DEBUG: outfile.write('// funcs\n') - outfile.write(blockaddrsize(indexize(funcs_js))) - funcs_js = None # forward forwarded_data = json.dumps(forwarded_json) @@ -272,8 +288,163 @@ def emscript(infile, settings, outfile, libraries=[]): post_file = temp_files.get('.post.ll').name open(post_file, 'w').write('\n') # no input, just processing of forwarded data out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, post_file, 'post', forwarded_file] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src')) - #if DEBUG: outfile.write('// post\n') - outfile.write(indexize(out)) + post, last_forwarded_data = out.split('//FORWARDED_DATA:') + last_forwarded_json = json.loads(last_forwarded_data) + + if settings.get('ASM_JS'): + simple = os.environ.get('EMCC_SIMPLE_ASM') + class Counter: + i = 0 + def make_table(sig, raw): + i = Counter.i + Counter.i += 1 + bad = 'b' + str(i) + params = ','.join(['p%d' % p for p in range(len(sig)-1)]) + coercions = ';'.join(['p%d = %sp%d%s' % (p, '+' if sig[p+1] == 'd' else '', p, '' if sig[p+1] == 'd' else '|0') for p in range(len(sig)-1)]) + ';' + ret = '' if sig[0] == 'v' else ('return %s0' % ('+' if sig[0] == 'd' else '')) + return 'function %s(%s) { %s abort(%d); %s };\n' % (bad, params, coercions, i, ret) + raw.replace('[0,', '[' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0]', ',' + bad + ']').replace(',0]', ',' + bad + ']') + function_tables_defs = '\n'.join([make_table(sig, raw) for sig, raw in last_forwarded_json['Functions']['tables'].iteritems()]) + + maths = ['Runtime.bitshift64', 'Math.floor', 'Math.min', 'Math.abs', 'Math.sqrt', 'Math.pow', 'Math.cos', 'Math.sin', 'Math.tan', 'Math.acos', 'Math.asin', 'Math.atan', 'Math.atan2', 'Math.exp', 'Math.log', 'Math.ceil'] + + if settings['USE_MATH_IMUL']: + maths += ['Math.imul'] + asm_setup = '\n'.join(['var %s = %s;' % (f.replace('.', '_'), f) for f in |