diff options
34 files changed, 1262 insertions, 655875 deletions
@@ -78,6 +78,17 @@ import os, sys, shutil, tempfile from subprocess import Popen, PIPE, STDOUT from tools import shared +# Mapping of emcc opt levels to llvm opt levels. We use llvm opt level 3 in emcc opt +# levels 2 and 3 (emcc 3 is unsafe opts, so unsuitable for the only level to get +# llvm opt level 3, and speed-wise emcc level 2 is already the slowest/most optimizing +# level) +LLVM_OPT_LEVEL = { + 0: 0, + 1: 1, + 2: 3, + 3: 3, +} + DEBUG = os.environ.get('EMCC_DEBUG') TEMP_DIR = os.environ.get('EMCC_TEMP_DIR') LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll files into .bc, just compile them with emscripten directly @@ -85,10 +96,16 @@ LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll # specific need. # One major limitation with this mode is that dlmalloc and libc++ cannot be # added in. Also, LLVM optimizations will not be done, nor dead code elimination +AUTODEBUG = os.environ.get('EMCC_AUTODEBUG') # If set to 1, we will run the autodebugger (the automatic debugging tool, see tools/autodebugger). + # Note that this will disable inclusion of libraries. This is useful because including + # dlmalloc makes it hard to compare native and js builds if DEBUG: print >> sys.stderr, 'emcc: ', ' '.join(sys.argv) if DEBUG and LEAVE_INPUTS_RAW: print >> sys.stderr, 'emcc: leaving inputs raw' +stdout = PIPE if not DEBUG else None # suppress output of child processes +stderr = PIPE if not DEBUG else None # unless we are in DEBUG mode + shared.check_sanity() # Handle some global flags @@ -115,7 +132,7 @@ Most normal gcc/g++ options will work, for example: Options that are modified or new in %s include: -O0 No optimizations (default) - -O1 Simple optimizations, including safe LLVM + -O1 Simple optimizations, including LLVM -O1 optimizations, and no runtime assertions or C++ exception catching (to re-enable C++ exception catching, use @@ -124,7 +141,8 @@ Options that are modified or new in %s include: compiling to JavaScript, not to intermediate bitcode. -O2 As -O1, plus the relooper (loop recreation), - plus closure compiler advanced opts + plus closure compiler advanced opts, plus + LLVM -O2 optimizations Warning: Compiling with this takes a long time! -O3 As -O2, plus dangerous optimizations that may break the generated code! If that happens, try @@ -135,12 +153,8 @@ Options that are modified or new in %s include: --typed-arrays <mode> 0: No typed arrays 1: Parallel typed arrays 2: Shared (C-like) typed arrays (default) - --llvm-opts <level> 0: No LLVM optimizations (default in -O0) - 1: Safe/portable LLVM optimizations - (default in -O1 and above) - 2: Full, unsafe/unportable LLVM optimizations; - this will almost certainly break the - generated code! + --llvm-opts <on> 0: No LLVM optimizations (default in -O0) + 1: LLVM optimizations (default in -O1 +) --closure <on> 0: No closure compiler (default in -O0, -O1) 1: Run closure compiler (default in -O2, -O3) --js-transform <cmd> <cmd> will be called on the generated code @@ -231,8 +245,6 @@ def unsuffixed(name): def unsuffixed_basename(name): return os.path.basename(unsuffixed(name)) -LLVM_INTERNAL_OPT_LEVEL = 2 - # ---------------- End configs ------------- if len(sys.argv) == 1 or sys.argv[1] in ['x', 't']: @@ -286,7 +298,7 @@ try: newargs = sys.argv[1:] opt_level = 0 - llvm_opt_level = None + llvm_opts = None closure = None js_transform = None compress_whitespace = None @@ -305,8 +317,7 @@ try: newargs[i] = '' elif newargs[i].startswith('--llvm-opts'): check_bad_eq(newargs[i]) - llvm_opt_level = eval(newargs[i+1]) - assert 0 <= llvm_opt_level <= 1, 'Only two levels of LLVM optimizations are supported so far, 0 (none) and 1 (safe)' + llvm_opts = eval(newargs[i+1]) newargs[i] = '' newargs[i+1] = '' elif newargs[i].startswith('--closure'): @@ -337,7 +348,7 @@ try: newargs[i+1] = '' newargs = [ arg for arg in newargs if arg is not '' ] - if llvm_opt_level is None: llvm_opt_level = 1 if opt_level >= 1 else 0 + if llvm_opts is None: llvm_opts = 1 if opt_level >= 1 else 0 if closure is None: closure = 1 if opt_level >= 2 else 0 if compress_whitespace is None: compress_whitespace = closure # if closure is run, compress whitespace @@ -435,7 +446,7 @@ try: # If we were just asked to generate bitcode, stop there if final_suffix not in ['js', 'html']: - if llvm_opt_level > 0: + if llvm_opts > 0: print >> sys.stderr, 'emcc: warning: -Ox flags ignored, since not generating JavaScript' if not specified_target: for input_file in input_files: @@ -460,7 +471,7 @@ try: extra_files_to_link = [] - if not LEAVE_INPUTS_RAW: + if not LEAVE_INPUTS_RAW and not AUTODEBUG: # Check if we need to include some libraries that we compile. (We implement libc ourselves in js, but # compile a malloc implementation and stdlibc++.) # Note that we assume a single symbol is enough to know if we have/do not have dlmalloc etc. If you @@ -469,9 +480,9 @@ try: # dlmalloc def create_dlmalloc(): if DEBUG: print >> sys.stderr, 'emcc: building dlmalloc for cache' - Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=PIPE, stderr=PIPE).communicate() + Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=stdout, stderr=stderr).communicate() # we include the libc++ new stuff here, so that the common case of using just new/delete is quick to link - Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=PIPE, stderr=PIPE).communicate() + Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=stdout, stderr=stderr).communicate() shared.Building.link([in_temp('dlmalloc.o'), in_temp('new.o')], in_temp('dlmalloc_full.o')) return in_temp('dlmalloc_full.o') def fix_dlmalloc(): @@ -530,46 +541,61 @@ try: if DEBUG: print >> sys.stderr, 'emcc: linking: ', linker_inputs shared.Building.link(linker_inputs, in_temp(target_basename + '.bc')) - # TODO: LLVM link-time opts? here and/or elsewhere? + final = in_temp(target_basename + '.bc') else: if not LEAVE_INPUTS_RAW: shutil.move(in_temp(unsuffixed_basename(input_files[0]) + '.o'), in_temp(target_basename + '.bc')) + final = in_temp(target_basename + '.bc') + else: + final = input_files[0] + + if DEBUG: + print >> sys.stderr, 'emcc: saving intermediate processing steps to %s' % shared.EMSCRIPTEN_TEMP_DIR + + intermediate_counter = 0 + def save_intermediate(name=None, suffix='js'): + global intermediate_counter + shutil.copyfile(final, os.path.join(shared.EMSCRIPTEN_TEMP_DIR, 'emcc-%d%s.%s' % (intermediate_counter, '' if name is None else '-' + name, suffix))) + intermediate_counter += 1 + + if not LEAVE_INPUTS_RAW: save_intermediate('basebc', 'bc') # Optimize, if asked to - if llvm_opt_level > 0 and not LEAVE_INPUTS_RAW: - if DEBUG: print >> sys.stderr, 'emcc: LLVM opts' - shared.Building.llvm_opt(in_temp(target_basename + '.bc'), LLVM_INTERNAL_OPT_LEVEL, safe=llvm_opt_level < 2) + if llvm_opts > 0 and opt_level > 0 and not LEAVE_INPUTS_RAW: + if DEBUG: print >> sys.stderr, 'emcc: LLVM -O%d' % LLVM_OPT_LEVEL[opt_level] + shared.Building.llvm_opt(in_temp(target_basename + '.bc'), LLVM_OPT_LEVEL[opt_level]) + if DEBUG: save_intermediate('opt', 'bc') + # Do LTO in a separate pass to work around LLVM bug XXX (see failure e.g. in cubescript) + if not shared.Settings.BUILD_AS_SHARED_LIB and not shared.Settings.LINKABLE: + if DEBUG: print >> sys.stderr, 'emcc: LLVM LTO' + shared.Building.llvm_opt(in_temp(target_basename + '.bc'), ['-disable-inlining', '-std-link-opts']) + if DEBUG: save_intermediate('lto', 'bc') else: # If possible, remove dead functions etc., this potentially saves a lot in the size of the generated code (and the time to compile it) if not LEAVE_INPUTS_RAW and not shared.Settings.BUILD_AS_SHARED_LIB and not shared.Settings.LINKABLE: if DEBUG: print >> sys.stderr, 'emcc: LLVM dead globals elimination' shared.Building.llvm_opt(in_temp(target_basename + '.bc'), ['-internalize', '-globaldce']) + if DEBUG: save_intermediate('dce', 'bc') - # Emscripten + # Prepare .ll for Emscripten try: if shared.Settings.RELOOP: print >> sys.stderr, 'emcc: warning: The relooper optimization can be very slow.' except: pass - if DEBUG: - print >> sys.stderr, 'emcc: saving intermediate processing steps to %s' % shared.EMSCRIPTEN_TEMP_DIR - - intermediate_counter = 0 - def save_intermediate(name=None, suffix='js'): - global intermediate_counter - shutil.copyfile(final, os.path.join(shared.EMSCRIPTEN_TEMP_DIR, 'emcc-%d%s.%s' % (intermediate_counter, '' if name is None else '-' + name, suffix))) - intermediate_counter += 1 - if not LEAVE_INPUTS_RAW: - final = in_temp(target_basename + '.bc') - if DEBUG: save_intermediate('bc', 'bc') final = shared.Building.llvm_dis(final, final + '.ll') else: assert len(input_files) == 1 - final = input_files[0] if DEBUG: save_intermediate('ll', 'll') + if AUTODEBUG: + Popen(['python', shared.AUTODEBUGGER, final, final + '.ad.ll']).communicate()[0] + final += '.ad.ll' + if DEBUG: save_intermediate('autodebug', 'll') + + # Emscripten if DEBUG: print >> sys.stderr, 'emcc: LLVM => JS' final = shared.Building.emscripten(final, append_ext=False) if DEBUG: save_intermediate('original') diff --git a/src/analyzer.js b/src/analyzer.js index 7412be6d..a4e7d52d 100644 --- a/src/analyzer.js +++ b/src/analyzer.js @@ -67,7 +67,7 @@ function analyzer(data, sidePass) { if (subItem.intertype == 'function') { item.functions.push(subItem); subItem.endLineNum = null; - subItem.lines = []; + subItem.lines = []; // We will fill in the function lines after the legalizer, since it can modify them subItem.labels = []; // no explicit 'entry' label in clang on LLVM 2.8 - most of the time, but not all the time! - so we add one if necessary @@ -87,7 +87,6 @@ function analyzer(data, sidePass) { } else if (item.functions.length > 0 && item.functions.slice(-1)[0].endLineNum === null) { // Internal line if (!currLabelFinished) { - item.functions.slice(-1)[0].lines.push(subItem); item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem); // If this line fails, perhaps missing a label? LLVM_STYLE related? if (subItem.intertype === 'branch') { currLabelFinished = true; @@ -100,7 +99,352 @@ function analyzer(data, sidePass) { } } delete item.items; - this.forwardItem(item, 'Typevestigator'); + this.forwardItem(item, 'Legalizer'); + } + }); + + // Legalize LLVM unrealistic types into realistic types. + // + // With full LLVM optimizations, it can generate types like i888 which do not exist in + // any actual hardware implementation, but are useful during optimization. LLVM then + // legalizes these types into real ones during code generation. Sadly, there is no LLVM + // IR pass to legalize them, which would have been useful and nice from a design perspective. + // The LLVM community is also not interested in receiving patches to implement that + // functionality, since it would duplicate existing code from the code generation + // component. Therefore, we implement legalization here in Emscripten. + // + // Currently we just legalize completely unrealistic types into bundles of i32s, and just + // the most common instructions that can be involved with such types: load, store, shifts, + // trunc and zext. + // + // TODO: Expand this also into legalization of i64 into i32,i32, which can then + // replace our i64 mode 1 implementation. Legalizing i64s is harder though + // as they can appear in function arguments and we would also need to implement + // an unfolder (to uninline inline LLVM function calls, so that each LLVM line + // has a single LLVM instruction). + substrate.addActor('Legalizer', { + processItem: function(data) { + // Legalization + if (USE_TYPED_ARRAYS == 2) { + function isIllegalType(type) { + return getBits(type) > 64; + } + function getLegalVars(base, bits) { + if (isNumber(base)) { + return getLegalLiterals(base, bits); + } + var ret = new Array(Math.ceil(bits/32)); + var i = 0; + while (bits > 0) { + ret[i] = { ident: base + '$' + i, bits: Math.min(32, bits) }; + bits -= 32; + i++; + } + return ret; + } + function getLegalLiterals(text, bits) { + var parsed = parseArbitraryInt(text, bits); + var ret = new Array(Math.ceil(bits/32)); + var i = 0; + while (bits > 0) { + ret[i] = { ident: parsed[i].toString(), bits: Math.min(32, bits) }; + bits -= 32; + i++; + } + return ret; + } + // Unfolds internal inline llvmfunc calls, for example x = load (bitcast y) + // will become temp = y \n x = load temp + // @return The index of the original line, after the unfolding. In the example + // above, the index returned will be the new index of the line with `load', + // that is, i+1. + function unfold(lines, i, item, slot) { + if (item[slot].intertype == 'value') return i; + // TODO: unfold multiple slots at once + var tempIdent = '$$emscripten$temp$' + i; + lines.splice(i, 0, { + intertype: 'assign', + ident: tempIdent, + value: item[slot], + lineNum: lines[i].lineNum - 0.5 + }); + item[slot] = { intertype: 'value', ident: tempIdent, type: item[slot].type }; + return i+1; + } + data.functions.forEach(function(func) { + func.labels.forEach(function(label) { + var i = 0, bits; + while (i < label.lines.length) { + var item = label.lines[i]; + if (item.intertype == 'store') { + if (isIllegalType(item.valueType)) { + dprint('legalizer', 'Legalizing store at line ' + item.lineNum); + i = unfold(label.lines, i, item, 'value'); + label.lines.splice(i, 1); + bits = getBits(item.valueType); + var elements; + elements = getLegalVars(item.value.ident, bits); + var j = 0; + elements.forEach(function(element) { + var tempVar = '$st$' + i + '$' + j; + label.lines.splice(i+j*2, 0, { + intertype: 'assign', + ident: tempVar, + value: { + intertype: 'getelementptr', + ident: item.pointer.ident, + type: '[0 x i32]*', + params: [ + { intertype: 'value', ident: item.pointer.ident, type: '[0 x i32]*' }, // technically a bitcase is needed in llvm, but not for us + { intertype: 'value', ident: '0', type: 'i32' }, + { intertype: 'value', ident: j.toString(), type: 'i32' } + ], + }, + lineNum: item.lineNum + (j/100) + }); + var actualSizeType = 'i' + element.bits; // The last one may be smaller than 32 bits + label.lines.splice(i+j*2+1, 0, { + intertype: 'store', + valueType: actualSizeType, + value: { intertype: 'value', ident: element.ident, type: actualSizeType }, + pointer: { intertype: 'value', ident: tempVar, type: actualSizeType + '*' }, + ident: tempVar, + pointerType: actualSizeType + '*', + align: item.align, + lineNum: item.lineNum + ((j+0.5)/100) + }); + j++; + }); + Types.needAnalysis['[0 x i32]'] = 0; + i += j*2; + continue; + } + } else if (item.intertype == 'assign') { + var value = item.value; + switch (value.intertype) { + case 'load': { + if (isIllegalType(value.valueType)) { + dprint('legalizer', 'Legalizing load at line ' + item.lineNum); + i = unfold(label.lines, i, value, 'pointer'); + label.lines.splice(i, 1); + bits = getBits(value.valueType); +// assert(value.pointer.intertype == 'value', 'TODO: unfolding'); + var elements = getLegalVars(item.ident, bits); + var j = 0; + elements.forEach(function(element) { + var tempVar = '$st$' + i + '$' + j; + label.lines.splice(i+j*2, 0, { + intertype: 'assign', + ident: tempVar, + value: { + intertype: 'getelementptr', + ident: value.pointer.ident, + type: '[0 x i32]*', + params: [ + { intertype: 'value', ident: value.pointer.ident, type: '[0 x i32]*' }, // technically bitcast is needed in llvm, but not for us + { intertype: 'value', ident: '0', type: 'i32' }, + { intertype: 'value', ident: j.toString(), type: 'i32' } + ], + }, + lineNum: item.lineNum + (j/100) + }); + var actualSizeType = 'i' + element.bits; // The last one may be smaller than 32 bits + label.lines.splice(i+j*2+1, 0, { + intertype: 'assign', + ident: element.ident, + value: { + intertype: 'load', + pointerType: actualSizeType + '*', + valueType: actualSizeType, + type: actualSizeType, // XXX why is this missing from intertyper? + pointer: { intertype: 'value', ident: tempVar, type: actualSizeType + '*' }, + ident: tempVar, + pointerType: actualSizeType + '*', + align: value.align, + }, + lineNum: item.lineNum + ((j+0.5)/100) + }); + j++; + }); + Types.needAnalysis['[0 x i32]'] = 0; + i += j*2; + continue; + } + } + case 'mathop': { + if (isIllegalType(value.type)) { + dprint('legalizer', 'Legalizing mathop at line ' + item.lineNum); + label.lines.splice(i, 1); + var toAdd = []; + assert(value.param1.intertype == 'value', 'TODO: unfolding'); + var sourceBits = getBits(value.param1.type); + var sourceElements; + if (sourceBits <= 64) { + // The input is a legal type + if (sourceBits <= 32) { + sourceElements = [{ ident: value.param1.ident, bits: sourceBits }]; + } else if (sourceBits == 64 && I64_MODE == 1) { + sourceElements = [{ ident: value.param1.ident + '[0]', bits: 32 }, + { ident: value.param1.ident + '[1]', bits: 32 }]; + // Add the source element as a param so that it is not eliminated as unneeded (the idents are not a simple ident here) + toAdd.push({ + intertype: 'value', ident: ';', type: 'rawJS', + params: [{ intertype: 'value', ident: value.param1.ident, type: 'i32' }] + }); + } else { + throw 'Invalid legal type as source of legalization ' + sourceBits; + } + } else { + sourceElements = getLegalVars(value.param1.ident, sourceBits); + } + // All mathops can be parametrized by how many shifts we do, and how big the source is + var shifts = 0; + var targetBits; + var processor = null; + switch (value.op) { + case 'lshr': { + assert(value.param2.intertype == 'value', 'TODO: unfolding'); + shifts = parseInt(value.param2.ident); + targetBits = sourceBits; + break; + } + case 'shl': { + assert(value.param2.intertype == 'value', 'TODO: unfolding'); + shifts = -parseInt(value.param2.ident); + targetBits = sourceBits; + break; + } + case 'trunc': case 'zext': { + assert(value.param2.intertype == 'type' || value.param2.intertype == 'value', 'TODO: unfolding'); + targetBits = getBits(value.param2.ident); + break; + } + case 'or': case 'and': case 'xor': { + targetBits = sourceBits; + var otherElements = getLegalVars(value.param2.ident, sourceBits); + processor = function(result, j) { + return { + intertype: 'mathop', + op: value.op, + type: 'i' + otherElements[j].bits, + param1: result, + param2: { intertype: 'value', ident: otherElements[j].ident, type: 'i' + otherElements[j].bits } + }; + }; + break; + } + default: throw 'Invalid mathop for legalization: ' + [value.op, item.lineNum, dump(item)]; + } + // Do the legalization + assert(isNumber(shifts), 'TODO: handle nonconstant shifts'); + var targetElements = getLegalVars(item.ident, targetBits); + var sign = shifts >= 0 ? 1 : -1; + var shiftOp = shifts >= 0 ? 'shl' : 'lshr'; + var shiftOpReverse = shifts >= 0 ? 'lshr' : 'shl'; + var whole = shifts >= 0 ? Math.floor(shifts/32) : Math.ceil(shifts/32); + var fraction = Math.abs(shifts % 32); + for (var j = 0; j < targetElements.length; j++) { + var result = { + intertype: 'value', + ident: (j + whole >= 0 && j + whole < sourceElements.length) ? sourceElements[j + whole].ident : '0', + type: 'i32', + }; + if (fraction != 0) { + var other = { + intertype: 'value', + ident: (j + sign + whole >= 0 && j + sign + whole < sourceElements.length) ? sourceElements[j + sign + whole].ident : '0', + type: 'i32', + }; + other = { + intertype: 'mathop', + op: shiftOp, + type: 'i32', + param1: other, + param2: { intertype: 'value', ident: (32 - fraction).toString(), type: 'i32' } + }; + result = { + intertype: 'mathop', + op: shiftOpReverse, + type: 'i32', + param1: result, + param2: { intertype: 'value', ident: fraction.toString(), type: 'i32' } + }; + result = { + intertype: 'mathop', + op: 'or', + type: 'i32', + param1: result, + param2: other + } + } + if (targetElements[j].bits < 32 && shifts < 0) { + // truncate bits that fall off the end. This is not needed in most cases, can probably be optimized out + result = { + intertype: 'mathop', + op: 'and', + type: 'i32', + param1: result, + param2: { intertype: 'value', ident: (Math.pow(2, targetElements[j].bits)-1).toString(), type: 'i32' } + } + } + if (processor) { + result = processor(result, j); + } + toAdd.push({ + intertype: 'assign', + ident: targetElements[j].ident, + value: result, + lineNum: item.lineNum + (j/100) + }); + } + if (targetBits <= 64) { + // We are generating a normal legal type here + var legalValue; + if (targetBits == 64 && I64_MODE == 1) { + // Generate an i64-1 [low,high]. This will be unnecessary when we legalize i64s + legalValue = { + intertype: 'value', + ident: '[' + targetElements[0].ident + ',' + targetElements[1].ident + ']', + type: 'rawJS', + // Add the target elements as params so that they are not eliminated as unneeded (the ident is not a simple ident here) + params: targetElements.map(function(element) { + return { intertype: 'value', ident: element.ident, type: 'i32' }; + }) + }; + } else if (targetBits <= 32) { + legalValue = { intertype: 'value', ident: targetElements[0].ident, type: 'rawJS' }; + // truncation to smaller than 32 bits has already been done, if necessary + } else { + throw 'Invalid legal type as target of legalization ' + targetBits; + } + toAdd.push({ + intertype: 'assign', + ident: item.ident, + value: legalValue, + lineNum: item.lineNum + ((j+1)/100) + }); + } + Array.prototype.splice.apply(label.lines, [i, 0].concat(toAdd)); + i += toAdd.length; + continue; + } + } + } + } + i++; + continue; + } + }); + }); + } + + // Add function lines to func.lines, after our modifications to the label lines + data.functions.forEach(function(func) { + func.labels.forEach(function(label) { + func.lines = func.lines.concat(label.lines); + }); + }); + this.forwardItem(data, 'Typevestigator'); } }); @@ -463,6 +807,12 @@ function analyzer(data, sidePass) { item.functions.forEach(function(func) { func.lines.forEach(function(line, i) { if (line.intertype === 'assign' && line.value.intertype === 'load') { + // Floats have no concept of signedness. Mark them as 'signed', which is the default, for which we do nothing + if (line.value.type in Runtime.FLOAT_TYPES) { + line.value.unsigned = false; + return; + } + // Booleans are always unsigned var data = func.variables[line.ident]; |