diff options
59 files changed, 4097 insertions, 13008 deletions
@@ -129,18 +129,18 @@ Most normal gcc/g++ options will work, for example: Options that are modified or new in %s include: -O0 No optimizations (default) -O1 Simple optimizations, including asm.js, LLVM -O1 - optimizations, and no runtime assertions + optimizations, relooping, and no runtime assertions or C++ exception catching (to re-enable C++ exception catching, use - -s DISABLE_EXCEPTION_CATCHING=0 ). - (For details on the affects of different - opt levels, see apply_opt_level() in - tools/shared.py and also src/settings.js.) - -O2 As -O1, plus the relooper (loop recreation), - LLVM -O3 optimizations, and + -s DISABLE_EXCEPTION_CATCHING=0 ), and enables -s ALIASING_FUNCTION_POINTERS=1 + (For details on the affects of different + opt levels, see apply_opt_level() in + tools/shared.py and also src/settings.js.) + -O2 As -O1, plus various js-level optimizations and + LLVM -O3 optimizations -O3 As -O2, plus dangerous optimizations that may break the generated code! This adds @@ -258,6 +258,9 @@ Options that are modified or new in %s include: try adjusting JAVA_HEAP_SIZE in the environment (for example, to 4096m for 4GB). + Note: Closure is only run if js opts are being + done (-O2 or above, or --js-opts 1). + --js-transform <cmd> <cmd> will be called on the generated code before it is optimized. This lets you modify the JavaScript, for example adding some code @@ -933,7 +936,7 @@ try: if default_cxx_std: newargs = newargs + [default_cxx_std] - if js_opts is None: js_opts = True + if js_opts is None: js_opts = opt_level >= 2 if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level] if llvm_lto is None and opt_level >= 3: llvm_lto = 3 if opt_level == 0: debug_level = 4 @@ -1105,6 +1108,16 @@ try: shared.Settings.CORRECT_OVERFLOWS = 1 assert not shared.Settings.PGO, 'cannot run PGO in ASM_JS mode' + heap = 4096 + while heap < shared.Settings.TOTAL_MEMORY: + if heap <= 16*1024*1024: + heap *= 2 + else: + heap += 16*1024*1024 + if heap != shared.Settings.TOTAL_MEMORY: + logging.warning('increasing TOTAL_MEMORY to %d to be more reasonable for asm.js' % heap) + shared.Settings.TOTAL_MEMORY = heap + if shared.Settings.CORRECT_SIGNS >= 2 or shared.Settings.CORRECT_OVERFLOWS >= 2 or shared.Settings.CORRECT_ROUNDINGS >= 2: debug_level = 4 # must keep debug info to do line-by-line operations @@ -1136,6 +1149,7 @@ try: if shared.Settings.MAIN_MODULE or shared.Settings.SIDE_MODULE: assert not memory_init_file, 'memory init file is not supported with module linking' + assert shared.Settings.ASM_JS, 'module linking requires asm.js output (-s ASM_JS=1)' shared.Settings.LINKABLE = 1 # TODO: add FORCE_DCE option for the brave people that do want to dce here and in side modules debug_level = max(debug_level, 2) @@ -1160,6 +1174,9 @@ try: if proxy_to_worker: shared.Settings.PROXY_TO_WORKER = 1 + if js_opts: + shared.Settings.RUNNING_JS_OPTS = 1 + ## Compile source code to bitcode logging.debug('compiling to bitcode') @@ -1239,7 +1256,7 @@ try: extra_files_to_link = [] - if not LEAVE_INPUTS_RAW and not AUTODEBUG and \ + if not LEAVE_INPUTS_RAW and \ not shared.Settings.BUILD_AS_SHARED_LIB == 2 and \ not shared.Settings.SIDE_MODULE: # shared libraries/side modules link no C libraries, need them in parent @@ -1703,7 +1720,7 @@ try: global final, js_optimizer_queue, js_optimizer_extra_info if len(js_optimizer_extra_info) == 0: js_optimizer_extra_info = None - if len(js_optimizer_queue) > 0 and not(len(js_optimizer_queue) == 1 and js_optimizer_queue[0] == 'last'): + if len(js_optimizer_queue) > 0 and not(not shared.Settings.ASM_JS and len(js_optimizer_queue) == 1 and js_optimizer_queue[0] == 'last'): if DEBUG != '2': if shared.Settings.ASM_JS: js_optimizer_queue = ['asm'] + js_optimizer_queue @@ -1738,7 +1755,10 @@ try: else: return 'eliminate' - js_optimizer_queue += [get_eliminate(), 'simplifyExpressions'] + js_optimizer_queue += [get_eliminate()] + + if opt_level >= 2: + js_optimizer_queue += ['simplifyExpressions'] if closure and not shared.Settings.ASM_JS: flush_js_optimizer_queue() diff --git a/emscripten.py b/emscripten.py index 2d7b3daf..b7f85e6f 100755 --- a/emscripten.py +++ b/emscripten.py @@ -49,15 +49,8 @@ if STDERR_FILE: logging.info('logging stderr in js compiler phase into %s' % STDERR_FILE) STDERR_FILE = open(STDERR_FILE, 'w') -def process_funcs((i, funcs, meta, settings_file, compiler, forwarded_file, libraries, compiler_engine, temp_files, DEBUG)): +def process_funcs((i, funcs_file, meta, settings_file, compiler, forwarded_file, libraries, compiler_engine, DEBUG)): try: - funcs_file = temp_files.get('.func_%d.ll' % i).name - f = open(funcs_file, 'w') - f.write(funcs) - funcs = None - f.write('\n') - f.write(meta) - f.close() #print >> sys.stderr, 'running', str([settings_file, funcs_file, 'funcs', forwarded_file] + libraries).replace("'/", "'") # can use this in src/compiler_funcs.html arguments, # # just copy temp dir to under this one out = jsrun.run_js( @@ -100,42 +93,42 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, ll = open(infile).read() scan(ll, settings) total_ll_size = len(ll) - ll = None # allow collection if DEBUG: logging.debug(' emscript: scan took %s seconds' % (time.time() - t)) # Split input into the relevant parts for each phase + + if DEBUG: t = time.time() + pre = [] funcs = [] # split up functions here, for parallelism later - meta = [] # needed by each function XXX - if DEBUG: t = time.time() - in_func = False - ll_lines = open(infile).readlines() - curr_func = None - for line in ll_lines: - if in_func: - curr_func.append(line) - if line.startswith('}'): - in_func = False - funcs.append((curr_func[0], ''.join(curr_func))) # use the entire line as the identifier - # pre needs to know about all implemented functions, even for non-pre func - pre.append(curr_func[0]) - pre.append(line) - curr_func = None - else: - if line.startswith(';'): continue - if line.startswith('define '): - in_func = True - curr_func = [line] - elif line.find(' = type { ') > 0: - pre.append(line) # type - elif line.startswith('!'): - if line.startswith('!llvm.module'): continue # we can ignore that - meta.append(line) # metadata - else: - pre.append(line) # pre needs it so we know about globals in pre and funcs. So emit globals there - ll_lines = None - meta = ''.join(meta) + meta_start = ll.find('\n!') + if meta_start > 0: + meta = ll[meta_start:] + else: + meta = '' + meta_start = -1 + + start = ll.find('\n') if ll[0] == ';' else 0 # ignore first line, which contains ; ModuleID = '/dir name' + + func_start = start + last = func_start + while 1: + last = func_start + func_start = ll.find('\ndefine ', func_start) + if func_start > last: + pre.append(ll[last:min(func_start+1, meta_start)] + '\n') + if func_start < 0: + pre.append(ll[last:meta_start] + '\n') + break + header = ll[func_start+1:ll.find('\n', func_start+1)+1] + end = ll.find('\n}', func_start) + last = end+3 + funcs.append((header, ll[func_start+1:last])) + pre.append(header + '}\n') + func_start = last + ll = None + if DEBUG and len(meta) > 1024*1024: logging.debug('emscript warning: large amounts of metadata, will slow things down') if DEBUG: logging.debug(' emscript: split took %s seconds' % (time.time() - t)) @@ -195,6 +188,7 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, jcache.set(shortkey, keys, out) pre, forwarded_data = out.split('//FORWARDED_DATA:') forwarded_file = temp_files.get('.json').name + pre_input = None open(forwarded_file, 'w').write(forwarded_data) if DEBUG: logging.debug(' emscript: phase 1 took %s seconds' % (time.time() - t)) @@ -254,11 +248,20 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, if DEBUG: logging.debug(' emscript: phase 2 working on %d chunks %s (intended chunk size: %.2f MB, meta: %.2f MB, forwarded: %.2f MB, total: %.2f MB)' % (len(chunks), ('using %d cores' % cores) if len(chunks) > 1 else '', chunk_size/(1024*1024.), len(meta)/(1024*1024.), len(forwarded_data)/(1024*1024.), total_ll_size/(1024*1024.))) - commands = [ - (i, chunk, meta, settings_file, compiler, forwarded_file, libraries, compiler_engine,# + ['--prof'], - temp_files, DEBUG) - for i, chunk in enumerate(chunks) - ] + commands = [] + for i in range(len(chunks)): + funcs_file = temp_files.get('.func_%d.ll' % i).name + f = open(funcs_file, 'w') + f.write(chunks[i]) + if not jcache: + chunks[i] = None # leave chunks array alive (need its length later) + f.write('\n') + f.write(meta) + f.close() + commands.append( + (i, funcs_file, meta, settings_file, compiler, forwarded_file, libraries, compiler_engine,# + ['--prof'], + DEBUG) + ) if len(chunks) > 1: pool = multiprocessing.Pool(processes=cores) @@ -329,8 +332,9 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, # calculations on merged forwarded data forwarded_json['Functions']['indexedFunctions'] = {} - i = 2 # universal counter - if settings['ASM_JS']: i += 2*settings['RESERVED_FUNCTION_POINTERS'] + i = settings['FUNCTION_POINTER_ALIGNMENT'] # universal counter + if settings['ASM_JS']: i += settings['RESERVED_FUNCTION_POINTERS']*settings['FUNCTION_POINTER_ALIGNMENT'] + base_fp = i table_counters = {} # table-specific counters alias = settings['ASM_JS'] and settings['ALIASING_FUNCTION_POINTERS'] sig = None @@ -339,13 +343,13 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, sig = forwarded_json['Functions']['implementedFunctions'].get(indexed) or forwarded_json['Functions']['unimplementedFunctions'].get(indexed) assert sig, indexed if sig not in table_counters: - table_counters[sig] = 2 + 2*settings['RESERVED_FUNCTION_POINTERS'] + table_counters[sig] = base_fp curr = table_counters[sig] - table_counters[sig] += 2 + table_counters[sig] += settings['FUNCTION_POINTER_ALIGNMENT'] else: curr = i - i += 2 - #logging.debug('function indexing', indexed, curr, sig) + i += settings['FUNCTION_POINTER_ALIGNMENT'] + #logging.debug('function indexing ' + str([indexed, curr, sig])) forwarded_json['Functions']['indexedFunctions'][indexed] = curr # make sure not to modify this python object later - we use it in indexize def split_32(x): @@ -353,17 +357,18 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, return '%d,%d,%d,%d' % (x&255, (x >> 8)&255, (x >> 16)&255, (x >> 24)&255) indexing = forwarded_json['Functions']['indexedFunctions'] + def indexize_mem(js): + return re.sub(r"\"?'?{{ FI_([\w\d_$]+) }}'?\"?,0,0,0", lambda m: split_32(indexing.get(m.groups(0)[0]) or 0), js) def indexize(js): - # In the global initial allocation, we need to split up into Uint8 format - ret = re.sub(r"\"?'?{{ FI_([\w\d_$]+) }}'?\"?,0,0,0", lambda m: split_32(indexing.get(m.groups(0)[0]) or 0), js) - return re.sub(r"'{{ FI_([\w\d_$]+) }}'", lambda m: str(indexing.get(m.groups(0)[0]) or 0), ret) + return re.sub(r"'{{ FI_([\w\d_$]+) }}'", lambda m: str(indexing.get(m.groups(0)[0]) or 0), js) blockaddrs = forwarded_json['Functions']['blockAddresses'] + def blockaddrsize_mem(js): + return re.sub(r'"?{{{ BA_([\w\d_$]+)\|([\w\d_$]+) }}}"?,0,0,0', lambda m: split_32(blockaddrs[m.groups(0)[0]][m.groups(0)[1]]), js) def blockaddrsize(js): - ret = re.sub(r'"?{{{ BA_([\w\d_$]+)\|([\w\d_$]+) }}}"?,0,0,0', lambda m: split_32(blockaddrs[m.groups(0)[0]][m.groups(0)[1]]), js) - return re.sub(r'"?{{{ BA_([\w\d_$]+)\|([\w\d_$]+) }}}"?', lambda m: str(blockaddrs[m.groups(0)[0]][m.groups(0)[1]]), ret) + return re.sub(r'"?{{{ BA_([\w\d_$]+)\|([\w\d_$]+) }}}"?', lambda m: str(blockaddrs[m.groups(0)[0]][m.groups(0)[1]]), js) - pre = blockaddrsize(indexize(pre)) + pre = blockaddrsize(blockaddrsize_mem(indexize(indexize_mem(pre)))) if settings.get('ASM_JS'): # move postsets into the asm module @@ -410,6 +415,7 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, simple = os.environ.get('EMCC_SIMPLE_ASM') class Counter: i = 0 + j = 0 pre_tables = last_forwarded_json['Functions']['tables']['pre'] del last_forwarded_json['Functions']['tables']['pre'] @@ -424,13 +430,18 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, end = raw.rindex(']') body = raw[start+1:end].split(',') for j in range(settings['RESERVED_FUNCTION_POINTERS']): - body[2 + 2*j] = 'jsCall_%s_%s' % (sig, j) + body[settings['FUNCTION_POINTER_ALIGNMENT'] * (1 + j)] = 'jsCall_%s_%s' % (sig, j) + Counter.j = 0 def fix_item(item): - newline = '\n' in item - return (bad if item.replace('\n', '') == '0' else item) + ('\n' if newline else '') + Counter.j += 1 + newline = Counter.j % 30 == 29 + if item == '0': return bad if not newline else (bad + '\n') + return item if not newline else (item + '\n') body = ','.join(map(fix_item, body)) - return ('function %s(%s) { %s %s(%d); %s }' % (bad, params, coercions, 'abort' if not settings['ASSERTIONS'] else 'nullFunc', i, ret), raw[:start+1] + body + raw[end:]) + return ('function %s(%s) { %s %s(%d); %s }' % (bad, params, coercions, 'abort' if not settings['ASSERTIONS'] else 'nullFunc', i, ret), ''.join([raw[:start+1], body, raw[end:]])) + infos = [make_table(sig, raw) for sig, raw in last_forwarded_json['Functions']['tables'].iteritems()] + function_tables_defs = '\n'.join([info[0] for info in infos]) + '\n// EMSCRIPTEN_END_FUNCS\n' + '\n'.join([info[1] for info in infos]) asm_setup = '' @@ -470,6 +481,7 @@ def emscript(infile, settings, outfile, libraries=[], compiler_engine=None, function_tables = ['dynCall_' + table for table in last_forwarded_json['Functions']['tables']] function_tables_impls = [] + for sig in last_forwarded_json['Functions']['tables'].iterkeys(): args = ','.join(['a' + str(i) for i in range(1, len(sig))]) arg_coercions = ' '.join(['a' + str(i) + '=' + asm_coerce('a' + str(i), sig[i]) + ';' for i in range(1, len(sig))]) @@ -579,7 +591,7 @@ function stackAlloc(size) { var ret = 0; ret = STACKTOP; STACKTOP = (STACKTOP + size)|0; -''' + ('STACKTOP = ((STACKTOP + 3)>>2)<<2;' if settings['TARGET_X86'] else 'STACKTOP = ((STACKTOP + 7)>>3)<<3;') + ''' +''' + ('STACKTOP = (STACKTOP + 3)&-4;' if settings['TARGET_X86'] else 'STACKTOP = (STACKTOP + 7)&-8;') + ''' return ret|0; } function stackSave() { @@ -685,7 +697,9 @@ Runtime.stackRestore = function(top) { asm['stackRestore'](top) }; symbol_table[value] = str(i) outfile.write("var SYMBOL_TABLE = %s;" % json.dumps(symbol_table).replace('"', '')) - for funcs_js_item in funcs_js: # do this loop carefully to save memory + for i in range(len(funcs_js)): # do this loop carefully to save memory + funcs_js_item = funcs_js[i] + funcs_js[i] = None funcs_js_item = indexize(funcs_js_item) funcs_js_item = blockaddrsize(funcs_js_item) outfile.write(funcs_js_item) @@ -714,7 +728,14 @@ def main(args, compiler_engine, cache, jcache, relooper, temp_files, DEBUG, DEBU if not os.path.exists(relooper): from tools import shared shared.Building.ensure_relooper(relooper) - + + settings.setdefault('STRUCT_INFO', cache.get_path('struct_info.compiled.json')) + struct_info = settings.get('STRUCT_INFO') + + if not os.path.exists(struct_info): + from tools import shared + shared.Building.ensure_struct_info(struct_info) + emscript(args.infile, settings, args.outfile, libraries, compiler_engine=compiler_engine, jcache=jcache, temp_files=temp_files, DEBUG=DEBUG, DEBUG_CACHE=DEBUG_CACHE) diff --git a/src/analyzer.js b/src/analyzer.js index 3fb20253..95fbccc7 100644 --- a/src/analyzer.js +++ b/src/analyzer.js @@ -188,7 +188,7 @@ function analyzer(data, sidePass) { if (USE_TYPED_ARRAYS == 2) { function getLegalVars(base, bits, allowLegal) { bits = bits || 32; // things like pointers are all i32, but show up as 0 bits from getBits - if (allowLegal && bits <= 32) return [{ ident: base + ('i' + bits in Runtime.INT_TYPES ? '' : '$0'), bits: bits }]; + if (allowLegal && bits <= 32) return [{ intertype: 'value', ident: base + ('i' + bits in Runtime.INT_TYPES ? '' : '$0'), bits: bits, type: 'i' + bits }]; if (isNumber(base)) return getLegalLiterals(base, bits); if (base[0] == '{') { warnOnce('seeing source of illegal data ' + base + ', likely an inline struct - assuming zeroinit'); @@ -198,7 +198,7 @@ function analyzer(data, sidePass) { var i = 0; if (base == 'zeroinitializer' || base == 'undef') base = 0; while (bits > 0) { - ret[i] = { ident: base ? base + '$' + i : '0', bits: Math.min(32, bits) }; + ret[i] = { intertype: 'value', ident: base ? base + '$' + i : '0', bits: Math.min(32, bits), type: 'i' + Math.min(32, bits) }; bits -= 32; i++; } @@ -209,7 +209,7 @@ function analyzer(data, sidePass) { var ret = new Array(Math.ceil(bits/32)); var i = 0; while (bits > 0) { - ret[i] = { ident: (parsed[i]|0).toString(), bits: Math.min(32, bits) }; // resign all values + ret[i] = { intertype: 'value', ident: (parsed[i]|0).toString(), bits: Math.min(32, bits), type: 'i' + Math.min(32, bits) }; // resign all values bits -= 32; i++; } @@ -225,7 +225,8 @@ function analyzer(data, sidePass) { return getLegalLiterals(value.ident, bits); } else if (value.intertype == 'structvalue') { return getLegalStructuralParts(value).map(function(part) { - return { ident: part.ident, bits: part.type.substr(1) }; + part.bits = part.type.substr(1); // can be some nested IR, like LLVM calls + return part; }); } else { return getLegalVars(value.ident, bits); @@ -550,11 +551,7 @@ function analyzer(data, sidePass) { return { intertype: 'phiparam', label: param.label, - value: { - intertype: 'value', - ident: values[k++][j].ident, - type: 'i' + element.bits, - } + value: values[k++][j] }; }) }); @@ -783,13 +780,14 @@ function analyzer(data, sidePass) { assert(PRECISE_I64_MATH, 'Must have precise i64 math for non-constant 64-bit shifts'); Types.preciseI64MathUsed = 1; value.intertype = 'value'; - value.ident = 'var ' + value.assignTo + '$0 = ' + + value.ident = makeVarDef(value.assignTo) + '$0=' + asmCoercion('_bitshift64' + value.op[0].toUpperCase() + value.op.substr(1) + '(' + asmCoercion(sourceElements[0].ident, 'i32') + ',' + asmCoercion(sourceElements[1].ident, 'i32') + ',' + asmCoercion(value.params[1].ident + '$0', 'i32') + ')', 'i32' ) + ';' + - 'var ' + value.assignTo + '$1 = tempRet0;'; + makeVarDef(value.assignTo) + '$1=tempRet0;'; + value.vars = [[value.assignTo + '$0', 'i32'], [value.assignTo + '$1', 'i32']]; value.assignTo = null; i++; continue; @@ -801,27 +799,65 @@ function analyzer(data, sidePass) { var whole = shifts >= 0 ? Math.floor(shifts/32) : Math.ceil(shifts/32); var fraction = Math.abs(shifts % 32); if (signed) { - var signedFill = '(' + makeSignOp(sourceElements[sourceElements.length-1].ident, 'i' + sourceElements[sourceElements.length-1].bits, 're', 1, 1) + ' < 0 ? -1 : 0)'; - var signedKeepAlive = { intertype: 'value', ident: sourceElements[sourceElements.length-1].ident, type: 'i32' }; - } - for (var j = 0; j < targetElements.length; j++) { - var result = { - intertype: 'value', - ident: (j + whole >= 0 && j + whole < sourceElements.length) ? sourceElements[j + whole]. |