diff options
46 files changed, 1706 insertions, 3944 deletions
@@ -2,6 +2,7 @@ *.pyc *~ *.bc +src/relooper*.js # Ignore generated files src/relooper.js @@ -724,8 +724,6 @@ try: if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level] if llvm_lto is None: llvm_lto = llvm_opts > 0 if closure is None: closure = 1 if opt_level >= 2 else 0 - if minify_whitespace is None: - minify_whitespace = closure # if closure is run, minify whitespace if opt_level <= 0: keep_debug = True # always keep debug in -O0 if DEBUG: start_time = time.time() # done after parsing arguments, which might affect debug state @@ -854,9 +852,23 @@ try: exec('shared.Settings.' + key + ' = ' + value) # Apply effects from settings + if shared.Settings.ASM_JS: + if closure: + print >> sys.stderr, 'emcc: warning: disabling closure because it is not compatible with asm.js code generation' + closure = False + if shared.Settings.CORRECT_SIGNS != 1: + print >> sys.stderr, 'emcc: warning: setting CORRECT_SIGNS to 1 for asm.js code generation' + shared.Settings.CORRECT_SIGNS = 1 + if shared.Settings.CORRECT_OVERFLOWS != 1: + print >> sys.stderr, 'emcc: warning: setting CORRECT_OVERFLOWS to 1 for asm.js code generation' + shared.Settings.CORRECT_OVERFLOWS = 1 + if shared.Settings.CORRECT_SIGNS >= 2 or shared.Settings.CORRECT_OVERFLOWS >= 2 or shared.Settings.CORRECT_ROUNDINGS >= 2: keep_debug = True # must keep debug info to do line-by-line operations + if minify_whitespace is None: + minify_whitespace = closure # if closure is run, minify whitespace + ## Compile source code to bitcode if DEBUG: print >> sys.stderr, 'emcc: compiling to bitcode' @@ -1138,6 +1150,16 @@ try: execute(shlex.split(js_transform, posix=posix) + [os.path.abspath(final)]) if DEBUG: save_intermediate('transformed') + if shared.Settings.ASM_JS: # XXX temporary wrapping for testing purposes + unwrapped = open(final).read() + final += '.asmwrap.js' + open(final, 'w').write(''' +(function() { // prevent new Function from seeing the global scope +%s +}).apply(null, arguments); +''' % unwrapped) + if DEBUG: save_intermediate('asmwrap') + # It is useful to run several js optimizer passes together, to save on unneeded unparsing/reparsing js_optimizer_queue = [] def flush_js_optimizer_queue(): @@ -1163,7 +1185,12 @@ try: if DEBUG: save_intermediate('pretty') def get_eliminate(): - return 'eliminate' if not shared.Settings.ALLOW_MEMORY_GROWTH else 'eliminateMemSafe' + if shared.Settings.ASM_JS: + return 'eliminateAsm' + elif shared.Settings.ALLOW_MEMORY_GROWTH: + return 'eliminateMemSafe' + else: + return 'eliminate' js_optimizer_queue += [get_eliminate()] @@ -1177,6 +1204,8 @@ try: if DEBUG: print >> sys.stderr, 'emcc: running closure' final = shared.Building.closure_compiler(final) if DEBUG: save_intermediate('closure') + elif shared.Settings.ASM_JS and shared.Settings.RELOOP: + js_optimizer_queue += ['registerizeAsm'] # we can't use closure in asm, but this does much of the same if opt_level >= 1: if DEBUG: print >> sys.stderr, 'emcc: running post-closure post-opts' diff --git a/emscripten.py b/emscripten.py index 98dcb6bb..e200ddd9 100755 --- a/emscripten.py +++ b/emscripten.py @@ -129,10 +129,12 @@ def emscript(infile, settings, outfile, libraries=[]): # Save settings to a file to work around v8 issue 1579 settings_file = temp_files.get('.txt').name - settings_text = json.dumps(settings) - s = open(settings_file, 'w') - s.write(settings_text) - s.close() + def save_settings(): + settings_text = json.dumps(settings) + s = open(settings_file, 'w') + s.write(settings_text) + s.close() + save_settings() # Phase 1 - pre if DEBUG: t = time.time() @@ -170,6 +172,9 @@ def emscript(infile, settings, outfile, libraries=[]): if DEBUG: t = time.time() forwarded_json = json.loads(forwarded_data) indexed_functions = set() + if settings.get('ASM_JS'): + settings['EXPORTED_FUNCTIONS'] = forwarded_json['EXPORTED_FUNCTIONS'] + save_settings() chunks = shared.JCache.chunkify(funcs, chunk_size, 'emscript_files' if jcache else None) @@ -223,16 +228,25 @@ def emscript(infile, settings, outfile, libraries=[]): if DEBUG: print >> sys.stderr, ' emscript: phase 2 took %s seconds' % (time.time() - t) if DEBUG: t = time.time() - funcs_js = ''.join([output[0] for output in outputs]) - + # merge forwarded data + if settings.get('ASM_JS'): + all_exported_functions = set(settings['EXPORTED_FUNCTIONS']) # both asm.js and otherwise + exported_implemented_functions = set() for func_js, curr_forwarded_data in outputs: - # merge forwarded data curr_forwarded_json = json.loads(curr_forwarded_data) forwarded_json['Types']['preciseI64MathUsed'] = forwarded_json['Types']['preciseI64MathUsed'] or curr_forwarded_json['Types']['preciseI64MathUsed'] for key, value in curr_forwarded_json['Functions']['blockAddresses'].iteritems(): forwarded_json['Functions']['blockAddresses'][key] = value for key in curr_forwarded_json['Functions']['indexedFunctions'].iterkeys(): indexed_functions.add(key) + if settings.get('ASM_JS'): + for key in curr_forwarded_json['Functions']['implementedFunctions'].iterkeys(): + if key in all_exported_functions: exported_implemented_functions.add(key) + for key, value in curr_forwarded_json['Functions']['unimplementedFunctions'].iteritems(): + forwarded_json['Functions']['unimplementedFunctions'][key] = value + + funcs_js = ''.join([output[0] for output in outputs]) + outputs = None if DEBUG: print >> sys.stderr, ' emscript: phase 2b took %s seconds' % (time.time() - t) if DEBUG: t = time.time() @@ -241,6 +255,7 @@ def emscript(infile, settings, outfile, libraries=[]): forwarded_json['Functions']['indexedFunctions'] = {} i = 2 for indexed in indexed_functions: + #print >> sys.stderr, 'indaxx', indexed, i forwarded_json['Functions']['indexedFunctions'][indexed] = i # make sure not to modify this python object later - we use it in indexize i += 2 forwarded_json['Functions']['nextIndex'] = i @@ -258,8 +273,6 @@ def emscript(infile, settings, outfile, libraries=[]): pre = None #if DEBUG: outfile.write('// funcs\n') - outfile.write(blockaddrsize(indexize(funcs_js))) - funcs_js = None # forward forwarded_data = json.dumps(forwarded_json) @@ -272,8 +285,146 @@ def emscript(infile, settings, outfile, libraries=[]): post_file = temp_files.get('.post.ll').name open(post_file, 'w').write('\n') # no input, just processing of forwarded data out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, post_file, 'post', forwarded_file] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src')) - #if DEBUG: outfile.write('// post\n') - outfile.write(indexize(out)) + post, last_forwarded_data = out.split('//FORWARDED_DATA:') + last_forwarded_json = json.loads(last_forwarded_data) + + if settings.get('ASM_JS'): + class Counter: + i = 0 + def make_table(sig, raw): + i = Counter.i + Counter.i += 1 + bad = 'b' + str(i) + params = ','.join(['p%d' % p for p in range(len(sig)-1)]) + coercions = ';'.join(['p%d = %sp%d%s' % (p, '+' if sig[p+1] == 'd' else '', p, '' if sig[p+1] == 'd' else '|0') for p in range(len(sig)-1)]) + ';' + ret = '' if sig[0] == 'v' else ('return %s0' % ('+' if sig[0] == 'd' else '')) + return 'function %s(%s) { %s abort(%d); %s };\n' % (bad, params, coercions, i, ret) + raw.replace('[0,', '[' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0,', ',' + bad + ',').replace(',0]', ',' + bad + ']').replace(',0]', ',' + bad + ']') + function_tables_defs = '\n'.join([make_table(sig, raw) for sig, raw in last_forwarded_json['Functions']['tables'].iteritems()]) + + asm_setup = '\n'.join(['var %s = %s;' % (f.replace('.', '_'), f) for f in ['Runtime.bitshift64', 'Math.floor', 'Math.min']]) + fundamentals = ['buffer', 'Int8Array', 'Int16Array', 'Int32Array', 'Uint8Array', 'Uint16Array', 'Uint32Array', 'Float32Array', 'Float64Array'] + basics = ['abort', 'assert', 'STACKTOP', 'STACK_MAX', 'tempDoublePtr', 'ABORT', 'Runtime_bitshift64', 'Math_floor', 'Math_min'] + if not settings['NAMED_GLOBALS']: basics += ['GLOBAL_BASE'] + if forwarded_json['Types']['preciseI64MathUsed']: + basics += ['i64Math_' + op for op in ['add', 'subtract', 'multiply', 'divide', 'modulo']] + asm_setup += ''' +var i64Math_add = function(a, b, c, d) { i64Math.add(a, b, c, d) }; +var i64Math_subtract = function(a, b, c, d) { i64Math.subtract(a, b, c, d) }; +var i64Math_multiply = function(a, b, c, d) { i64Math.multiply(a, b, c, d) }; +var i64Math_divide = function(a, b, c, d, e) { i64Math.divide(a, b, c, d, e) }; +var i64Math_modulo = function(a, b, c, d, e) { i64Math.modulo(a, b, c, d, e) }; +''' + asm_runtime_funcs = ['stackAlloc', 'stackSave', 'stackRestore', 'setThrew'] + ['setTempRet%d' % i for i in range(10)] + # function tables + function_tables = ['dynCall_' + table for table in last_forwarded_json['Functions']['tables']] + function_tables_impls = [] + for sig in last_forwarded_json['Functions']['tables'].iterkeys(): + args = ','.join(['a' + str(i) for i in range(1, len(sig))]) + arg_coercions = ' '.join(['a' + str(i) + '=' + ('+' if sig[i] == 'd' else '') + 'a' + str(i) + ('|0' if sig[i] == 'i' else '') + ';' for i in range(1, len(sig))]) + function_tables_impls.append(''' + function dynCall_%s(index%s%s) { + %s + %sFUNCTION_TABLE_%s[index&{{{ FTM_%s }}}](%s); + } +''' % (sig, ',' if len(sig) > 1 else '', args, arg_coercions, 'return ' if sig[0] != 'v' else '', sig, sig, args)) + # calculate exports + exported_implemented_functions = list(exported_implemented_functions) + exports = [] + for export in exported_implemented_functions + asm_runtime_funcs + function_tables: + exports.append("'%s': %s" % (export, export)) + exports = '{ ' + ', '.join(exports) + ' }' + # calculate globals + try: + del forwarded_json['Variables']['globals']['_llvm_global_ctors'] # not a true variable + except: + pass + global_vars = forwarded_json['Variables']['globals'].keys() if settings['NAMED_GLOBALS'] else [] + global_funcs = ['_' + x for x in forwarded_json['Functions']['libraryFunctions'].keys()] + asm_globals = ''.join([' var ' + g + '=env.' + g + ';\n' for g in basics + global_funcs + global_vars]) + # sent data + sending = '{ ' + ', '.join([s + ': ' + s for s in fundamentals + basics + global_funcs + global_vars]) + ' }' + # received + receiving = ';\n'.join(['var ' + s + ' = Module["' + s + '"] = asm.' + s for s in exported_implemented_functions + function_tables]) + # finalize + funcs_js = ''' +%s +var asmPre = (function(env, buffer) { + 'use asm'; + var HEAP8 = new env.Int8Array(buffer); + var HEAP16 = new env.Int16Array(buffer); + var HEAP32 = new env.Int32Array(buffer); + var HEAPU8 = new env.Uint8Array(buffer); + var HEAPU16 = new env.Uint16Array(buffer); + var HEAPU32 = new env.Uint32Array(buffer); + var HEAPF32 = new env.Float32Array(buffer); + var HEAPF64 = new env.Float64Array(buffer); +''' % (asm_setup,) + asm_globals + ''' + var __THREW__ = 0; + var undef = 0; + + function stackAlloc(size) { + var ret = STACKTOP; + STACKTOP = (STACKTOP + size)|0; + STACKTOP = ((STACKTOP + 3)>>2)<<2; + return ret|0; + } + function stackSave() { + return STACKTOP|0; + } + function stackRestore(top) { + top = top|0; + STACKTOP = top; + } + function setThrew(threw) { + threw = threw|0; + __THREW__ = threw; + } +''' + ''.join([''' + var tempRet%d = 0; + function setTempRet%d(value) { + value = value|0; + tempRet%d = value; + } +''' % (i, i, i) for i in range(10)]) + funcs_js.replace('\n', '\n ') + ''' + + %s + + return %s; +}); +if (asmPre.toSource) { // works in sm but not v8, so we get full coverage between those two + asmPre = asmPre.toSource(); + asmPre = asmPre.substr(25, asmPre.length-28); + asmPre = new Function('env', 'buffer', asmPre); +} +var asm = asmPre(%s, buffer); // pass through Function to prevent seeing outside scope +%s; +Runtime.stackAlloc = function(size) { return asm.stackAlloc(size) }; +Runtime.stackSave = function() { return asm.stackSave() }; +Runtime.stackRestore = function(top) { asm.stackRestore(top) }; +''' % (function_tables_defs.replace('\n', '\n ') + '\n' + '\n'.join(function_tables_impls), exports, sending, receiving) + + # Set function table masks + def function_table_maskize(js): + masks = {} + default = None + for sig, table in last_forwarded_json['Functions']['tables'].iteritems(): + masks[sig] = str(table.count(',')) + default = sig + def fix(m): + sig = m.groups(0)[0] + if not sig in masks: + print >> sys.stderr, 'warning: function table use without functions for it!', sig + return masks[default] # TODO: generate empty function tables for this case, even though it would fail at runtime if used + return masks[sig] + return re.sub(r'{{{ FTM_([\w\d_$]+) }}}', lambda m: fix(m), js) # masks[m.groups(0)[0]] + funcs_js = function_table_maskize(funcs_js) + else: + function_tables_defs = '\n'.join([table for table in last_forwarded_json['Functions']['tables'].itervalues()]) + outfile.write(function_tables_defs) + outfile.write(blockaddrsize(indexize(funcs_js))) + funcs_js = None + + outfile.write(indexize(post)) if DEBUG: print >> sys.stderr, ' emscript: phase 3 took %s seconds' % (time.time() - t) outfile.close() diff --git a/src/analyzer.js b/src/analyzer.js index 014579f4..0ad3e017 100644 --- a/src/analyzer.js +++ b/src/analyzer.js @@ -19,7 +19,7 @@ function recomputeLines(func) { var BRANCH_INVOKE = set('branch', 'invoke'); var SIDE_EFFECT_CAUSERS = set('call', 'invoke', 'atomic'); -var UNUNFOLDABLE = set('value', 'type', 'phiparam'); +var UNUNFOLDABLE = set('value', 'structvalue', 'type', 'phiparam'); // Analyzer @@ -120,12 +120,14 @@ function analyzer(data, sidePass) { processItem: function(data) { // Legalization if (USE_TYPED_ARRAYS == 2) { - function getLegalVars(base, bits) { - assert(!isNumber(base)); + function getLegalVars(base, bits, allowLegal) { + if (allowLegal && bits <= 32) return [{ ident: base, bits: bits }]; + if (isNumber(base)) return getLegalLiterals(base, bits); var ret = new Array(Math.ceil(bits/32)); var i = 0; + if (base == 'zeroinitializer' || base == 'undef') base = 0; while (bits > 0) { - ret[i] = { ident: base + '$' + i, bits: Math.min(32, bits) }; + ret[i] = { ident: base ? base + '$' + i : '0', bits: Math.min(32, bits) }; bits -= 32; i++; } @@ -142,6 +144,23 @@ function analyzer(data, sidePass) { } return ret; } + function getLegalStructuralParts(value) { + return value.params.slice(0); + } + function getLegalParams(params, bits) { + return params.map(function(param) { + var value = param.value || param; + if (isNumber(value.ident)) { + return getLegalLiterals(value.ident, bits); + } else if (value.intertype == 'structvalue') { + return getLegalStructuralParts(value).map(function(part) { + return { ident: part.ident, bits: part.type.substr(1) }; + }); + } else { + return getLegalVars(value.ident, bits); + } + }); + } // Uses the right factor to multiply line numbers by so that they fit in between // the line[i] and the line after it function interpLines(lines, i, toAdd) { @@ -191,6 +210,7 @@ function analyzer(data, sidePass) { // Legalize lines in labels var tempId = 0; func.labels.forEach(function(label) { + if (dcheck('legalizer')) dprint('zz legalizing: \n' + dump(label.lines)); var i = 0, bits; while (i < label.lines.length) { var item = label.lines[i]; @@ -207,8 +227,12 @@ function analyzer(data, sidePass) { if (isIllegalType(item.valueType) || isIllegalType(item.type)) { isIllegal = true; } + if ((item.intertype == 'load' || item.intertype == 'store') && isStructType(item.valueType)) { + isIllegal = true; // storing an entire structure is illegal + } }); if (!isIllegal) { + //if (dcheck('legalizer')) dprint('no need to legalize \n' + dump(item)); i++; continue; } @@ -222,10 +246,10 @@ function analyzer(data, sidePass) { if (subItem != item && (!(subItem.intertype in UNUNFOLDABLE) || (subItem.intertype == 'value' && isNumber(subItem.ident) && isIllegalType(subItem.type)))) { if (item.intertype == 'phi') { - assert(subItem.intertype == 'value', 'We can only unfold illegal constants in phis'); + assert(subItem.intertype == 'value' || subItem.intertype == 'structvalue', 'We can only unfold illegal constants in phis'); // we must handle this in the phi itself, if we unfold normally it will not be pushed back with the phi } else { - var tempIdent = '$$emscripten$temp$' + (tempId++); + var tempIdent = '$$etemp$' + (tempId++); subItem.assignTo = tempIdent; unfolded.unshift(subItem); fixUnfolded(subItem); @@ -234,7 +258,7 @@ function analyzer(data, sidePass) { } else if (subItem.intertype == 'switch' && isIllegalType(subItem.type)) { subItem.switchLabels.forEach(function(switchLabel) { if (switchLabel.value[0] != '$') { - var tempIdent = '$$emscripten$temp$' + (tempId++); + var tempIdent = '$$etemp$' + (tempId++); unfolded.unshift({ assignTo: tempIdent, intertype: 'value', @@ -258,8 +282,7 @@ function analyzer(data, sidePass) { case 'store': { var toAdd = []; bits = getBits(item.valueType); - var elements; - elements = getLegalVars(item.value.ident, bits); + var elements = getLegalParams([item.value], bits)[0]; var j = 0; elements.forEach(function(element) { var tempVar = '$st$' + i + '$' + j; @@ -290,32 +313,43 @@ function analyzer(data, sidePass) { i += removeAndAdd(label.lines, i, toAdd); continue; } - // call, return: Return value is in an unlegalized array literal. Not fully optimal. + // call, return: Return the first 32 bits, the rest are in temp case 'call': { bits = getBits(value.type); var elements = getLegalVars(item.assignTo, bits); var toAdd = [value]; // legalize parameters legalizeFunctionParameters(value.params); - if (value.assignTo) { + if (value.assignTo && isIllegalType(item.type)) { // legalize return value - var j = 0; - toAdd = toAdd.concat(elements.map(function(element) { - return { + value.assignTo = elements[0].ident; + for (var j = 1; j < elements.length; j++) { + var element = elements[j]; + toAdd.push({ intertype: 'value', assignTo: element.ident, - type: 'i' + bits, - ident: value.assignTo + '[' + (j++) + ']' - }; - })); + type: element.bits, + ident: 'tempRet' + (j - 1) + }); + assert(j<10); // TODO: dynamically create more than 10 tempRet-s + } } i += removeAndAdd(label.lines, i, toAdd); continue; } + case 'landingpad': { + // not much to legalize + i++; + continue; + } case 'return': { bits = getBits(item.type); var elements = getLegalVars(item.value.ident, bits); - item.value.ident = '[' + elements.map(function(element) { return element.ident }).join(',') + ']'; + item.value.ident = '('; + for (var j = 1; j < elements.length; j++) { + item.value.ident += 'tempRet' + (j-1) + '=' + elements[j].ident + ','; + } + item.value.ident += elements[0].ident + ')'; i++; continue; } @@ -341,6 +375,21 @@ function analyzer(data, sidePass) { i += removeAndAdd(label.lines, i, toAdd); continue; } + case 'structvalue': { + bits = getBits(value.type); + var elements = getLegalVars(item.assignTo, bits); + var toAdd = []; + for (var j = 0; j < item.params.length; j++) { + toAdd[j] = { + intertype: 'value', + assignTo: elements[j].ident, + type: 'i32', + ident: item.params[j].ident + }; + } + i += removeAndAdd(label.lines, i, toAdd); + continue; + } case 'load': { bits = getBits(value.valueType); var elements = getLegalVars(item.assignTo, bits); @@ -382,13 +431,9 @@ function analyzer(data, sidePass) { var toAdd = []; var elements = getLegalVars(item.assignTo, bits); var j = 0; - var literalValues = {}; // special handling of literals - we cannot unfold them normally - value.params.map(function(param) { - if (isNumber(param.value.ident)) { - literalValues[param.value.ident] = getLegalLiterals(param.value.ident, bits); - } - }); + var values = getLegalParams(value.params, bits); elements.forEach(function(element) { + var k = 0; toAdd.push({ intertype: 'phi', assignTo: element.ident, @@ -399,7 +444,7 @@ function analyzer(data, sidePass) { label: param.label, value: { intertype: 'value', - ident: (param.value.ident in literalValues) ? literalValues[param.value.ident][j].ident : (param.value.ident + '$' + j), + ident: values[k++][j].ident, type: 'i' + element.bits, } }; @@ -414,6 +459,62 @@ function analyzer(data, sidePass) { i++; continue; // special case, handled in makeComparison } + case 'extractvalue': { // XXX we assume 32-bit alignment in extractvalue/insertvalue, + // but in theory they can run on packed structs too (see use getStructuralTypePartBits) + // potentially legalize the actual extracted value too if it is >32 bits, not just the extraction in general + var index = item.indexes[0][0].text; + var parts = getStructureTypeParts(item.type); + var indexedType = parts[index]; + var targetBits = getBits(indexedType); + var sourceBits = getBits(item.type); + var elements = getLegalVars(item.assignTo, targetBits, true); // possibly illegal + var sourceElements = getLegalVars(item.ident, sourceBits); // definitely illegal + var toAdd = []; + var sourceIndex = 0; + for (var partIndex = 0; partIndex < parts.length; partIndex++) { + if (partIndex == index) { + for (var j = 0; j < elements.length; j++) { + toAdd.push({ + intertype: 'value', + assignTo: elements[j].ident, + type: 'i' + elements[j].bits, + ident: sourceElements[sourceIndex+j].ident + }); + } + break; + } + sourceIndex += getStructuralTypePartBits(parts[partIndex])/32; + } + i += removeAndAdd(label.lines, i, toAdd); + continue; + } + case 'insertvalue': { + var index = item.indexes[0][0].text; // the modified index + var parts = getStructureTypeParts(item.type); + var indexedType = parts[index]; + var indexBits = getBits(indexedType); + var bits = getBits(item.type); // source and target + bits = getBits(value.type); + var toAdd = []; + var elements = getLegalVars(item.assignTo, bits); + var sourceElements = getLegalVars(item.ident, bits); + var indexElements = getLegal |