25 files changed, 876 insertions, 216 deletions
diff --git a/emcc b/emcc
index cb8e41ae..8d4a3205 100755
--- a/emcc
+++ b/emcc
@@ -77,6 +77,8 @@ import os, sys, shutil, tempfile
 from subprocess import Popen, PIPE, STDOUT
 from tools import shared
 
+MAX_LLVM_OPT_LEVEL = 3
+
 DEBUG = os.environ.get('EMCC_DEBUG')
 TEMP_DIR = os.environ.get('EMCC_TEMP_DIR')
 LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll files into .bc, just compile them with emscripten directly
@@ -84,10 +86,16 @@ LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll
                                                            # specific need.
                                                            # One major limitation with this mode is that dlmalloc and libc++ cannot be
                                                            # added in. Also, LLVM optimizations will not be done, nor dead code elimination
+AUTODEBUG = os.environ.get('EMCC_AUTODEBUG') # If set to 1, we will run the autodebugger (the automatic debugging tool, see tools/autodebugger).
+                                             # Note that this will disable inclusion of libraries. This is useful because including
+                                             # dlmalloc makes it hard to compare native and js builds
 
 if DEBUG: print >> sys.stderr, 'emcc: ', ' '.join(sys.argv)
 if DEBUG and LEAVE_INPUTS_RAW: print >> sys.stderr, 'emcc: leaving inputs raw'
 
+stdout = PIPE if not DEBUG else None # suppress output of child processes
+stderr = PIPE if not DEBUG else None # unless we are in DEBUG mode
+
 shared.check_sanity()
 
 # Handle some global flags
@@ -114,7 +122,7 @@ Most normal gcc/g++ options will work, for example:
 
 Options that are modified or new in %s include:
   -O0                      No optimizations (default)
-  -O1                      Simple optimizations, including safe LLVM
+  -O1                      Simple optimizations, including LLVM -O1
                            optimizations, and no runtime assertions
                            or C++ exception catching (to re-enable
                            C++ exception catching, use
@@ -123,7 +131,8 @@ Options that are modified or new in %s include:
                            compiling to JavaScript, not to intermediate
                            bitcode.
   -O2                      As -O1, plus the relooper (loop recreation),
-                           plus closure compiler advanced opts
+                           plus closure compiler advanced opts, plus
+                           LLVM -O2 optimizations
                            Warning: Compiling with this takes a long time!
   -O3                      As -O2, plus dangerous optimizations that may
                            break the generated code! If that happens, try
@@ -134,12 +143,8 @@ Options that are modified or new in %s include:
   --typed-arrays <mode>    0: No typed arrays
                            1: Parallel typed arrays
                            2: Shared (C-like) typed arrays (default)
-  --llvm-opts <level>      0: No LLVM optimizations (default in -O0)
-                           1: Safe/portable LLVM optimizations
-                              (default in -O1 and above)
-                           2: Full, unsafe/unportable LLVM optimizations;
-                              this will almost certainly break the
-                              generated code!
+  --llvm-opts <on>         0: No LLVM optimizations (default in -O0)
+                           1: LLVM optimizations (default in -O1 +)
   --closure <on>           0: No closure compiler (default in -O0, -O1)
                            1: Run closure compiler (default in -O2, -O3)
   --js-transform <cmd>     <cmd> will be called on the generated code
@@ -230,8 +235,6 @@ def unsuffixed(name):
 def unsuffixed_basename(name):
   return os.path.basename(unsuffixed(name))
 
-LLVM_INTERNAL_OPT_LEVEL = 2
-
 # ---------------- End configs -------------
 
 if len(sys.argv) == 1 or sys.argv[1] in ['x', 't']:
@@ -285,7 +288,7 @@ try:
   newargs = sys.argv[1:]
 
   opt_level = 0
-  llvm_opt_level = None
+  llvm_opts = None
   closure = None
   js_transform = None
   compress_whitespace = None
@@ -304,8 +307,7 @@ try:
       newargs[i] = ''
     elif newargs[i].startswith('--llvm-opts'):
       check_bad_eq(newargs[i])
-      llvm_opt_level = eval(newargs[i+1])
-      assert 0 <= llvm_opt_level <= 1, 'Only two levels of LLVM optimizations are supported so far, 0 (none) and 1 (safe)'
+      llvm_opts = eval(newargs[i+1])
       newargs[i] = ''
       newargs[i+1] = ''
     elif newargs[i].startswith('--closure'):
@@ -336,7 +338,7 @@ try:
       newargs[i+1] = ''
   newargs = [ arg for arg in newargs if arg is not '' ]
 
-  if llvm_opt_level is None: llvm_opt_level = 1 if opt_level >= 1 else 0
+  if llvm_opts is None: llvm_opts = 1 if opt_level >= 1 else 0
   if closure is None: closure = 1 if opt_level >= 2 else 0
   if compress_whitespace is None:
     compress_whitespace = closure # if closure is run, compress whitespace
@@ -434,7 +436,7 @@ try:
 
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in ['js', 'html']:
-    if llvm_opt_level > 0:
+    if llvm_opts > 0:
       print >> sys.stderr, 'emcc: warning: -Ox flags ignored, since not generating JavaScript'
     if not specified_target:
       for input_file in input_files:
@@ -459,7 +461,7 @@ try:
 
   extra_files_to_link = []
 
-  if not LEAVE_INPUTS_RAW:
+  if not LEAVE_INPUTS_RAW and not AUTODEBUG:
     # Check if we need to include some libraries that we compile. (We implement libc ourselves in js, but
     # compile a malloc implementation and stdlibc++.)
     # Note that we assume a single symbol is enough to know if we have/do not have dlmalloc etc. If you
@@ -468,9 +470,9 @@ try:
     # dlmalloc
     def create_dlmalloc():
       if DEBUG: print >> sys.stderr, 'emcc: building dlmalloc for cache'
-      Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=PIPE, stderr=PIPE).communicate()
+      Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=stdout, stderr=stderr).communicate()
       # we include the libc++ new stuff here, so that the common case of using just new/delete is quick to link
-      Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=PIPE, stderr=PIPE).communicate()
+      Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=stdout, stderr=stderr).communicate()
       shared.Building.link([in_temp('dlmalloc.o'), in_temp('new.o')], in_temp('dlmalloc_full.o'))
       return in_temp('dlmalloc_full.o')
     def fix_dlmalloc():
@@ -535,16 +537,16 @@ try:
       shutil.move(in_temp(unsuffixed_basename(input_files[0]) + '.o'), in_temp(target_basename + '.bc'))
 
   # Optimize, if asked to
-  if llvm_opt_level > 0 and not LEAVE_INPUTS_RAW:
+  if llvm_opts > 0 and opt_level > 0 and not LEAVE_INPUTS_RAW:
     if DEBUG: print >> sys.stderr, 'emcc: LLVM opts'
-    shared.Building.llvm_opt(in_temp(target_basename + '.bc'), LLVM_INTERNAL_OPT_LEVEL, safe=llvm_opt_level < 2)
+    shared.Building.llvm_opt(in_temp(target_basename + '.bc'), min(opt_level, MAX_LLVM_OPT_LEVEL))
   else:
     # If possible, remove dead functions etc., this potentially saves a lot in the size of the generated code (and the time to compile it)
     if not LEAVE_INPUTS_RAW and not shared.Settings.BUILD_AS_SHARED_LIB and not shared.Settings.LINKABLE:
       if DEBUG: print >> sys.stderr, 'emcc: LLVM dead globals elimination'
       shared.Building.llvm_opt(in_temp(target_basename + '.bc'), ['-internalize', '-globaldce'])
 
-  # Emscripten
+  # Prepare .ll for Emscripten
   try:
     if shared.Settings.RELOOP:
       print >> sys.stderr, 'emcc: warning: The relooper optimization can be very slow.'
@@ -569,6 +571,12 @@ try:
     final = input_files[0]
   if DEBUG: save_intermediate('ll', 'll')
 
+  if AUTODEBUG:
+    Popen(['python', shared.AUTODEBUGGER, final, final + '.ad.ll']).communicate()[0]
+    final += '.ad.ll'
+    if DEBUG: save_intermediate('autodebug', 'll')
+
+  # Emscripten
   if DEBUG: print >> sys.stderr, 'emcc: LLVM => JS'
   final = shared.Building.emscripten(final, append_ext=False)
   if DEBUG: save_intermediate('original')
diff --git a/src/analyzer.js b/src/analyzer.js
index 7412be6d..7eca1d5a 100644
--- a/src/analyzer.js
+++ b/src/analyzer.js
@@ -67,7 +67,7 @@ function analyzer(data, sidePass) {
         if (subItem.intertype == 'function') {
           item.functions.push(subItem);
           subItem.endLineNum = null;
-          subItem.lines = [];
+          subItem.lines = []; // We will fill in the function lines after the legalizer, since it can modify them
           subItem.labels = [];
 
           // no explicit 'entry' label in clang on LLVM 2.8 - most of the time, but not all the time! - so we add one if necessary
@@ -87,7 +87,6 @@ function analyzer(data, sidePass) {
         } else if (item.functions.length > 0 && item.functions.slice(-1)[0].endLineNum === null) {
           // Internal line
           if (!currLabelFinished) {
-            item.functions.slice(-1)[0].lines.push(subItem);
             item.functions.slice(-1)[0].labels.slice(-1)[0].lines.push(subItem); // If this line fails, perhaps missing a label? LLVM_STYLE related?
             if (subItem.intertype === 'branch') {
               currLabelFinished = true;
@@ -100,7 +99,316 @@ function analyzer(data, sidePass) {
         }
       }
       delete item.items;
-      this.forwardItem(item, 'Typevestigator');
+      this.forwardItem(item, 'Legalizer');
+    }
+  });
+
+  // Legalize LLVM unrealistic types into realistic types.
+  //
+  // With full LLVM optimizations, it can generate types like i888 which do not exist in
+  // any actual hardware implementation, but are useful during optimization. LLVM then
+  // legalizes these types into real ones during code generation. Sadly, there is no LLVM
+  // IR pass to legalize them, which would have been useful and nice from a design perspective.
+  // The LLVM community is also not interested in receiving patches to implement that
+  // functionality, since it would duplicate existing code from the code generation
+  // component. Therefore, we implement legalization here in Emscripten.
+  //
+  // Currently we just legalize completely unrealistic types into bundles of i32s, and just
+  // the most common instructions that can be involved with such types: load, store, shifts,
+  // trunc and zext.
+  //
+  // TODO: Expand this also into legalization of i64 into i32,i32, which can then
+  //       replace our i64 mode 1 implementation. Legalizing i64s is harder though
+  //       as they can appear in function arguments and we would also need to implement
+  //       an unfolder (to uninline inline LLVM function calls, so that each LLVM line
+  //       has a single LLVM instruction).
+  substrate.addActor('Legalizer', {
+    processItem: function(data) {
+      // Legalization
+      if (USE_TYPED_ARRAYS == 2) {
+        function isIllegalType(type) {
+          return getBits(type) > 64;
+        }
+        function getLegalVars(base, bits) {
+          var ret = new Array(Math.ceil(bits/32));
+          var i = 0;
+          while (bits > 0) {
+            ret[i] = { ident: base + '$' + i, bits: Math.min(32, bits) };
+            bits -= 32;
+            i++;
+          }
+          return ret;
+        }
+        function getLegalLiterals(text, bits) {
+          var parsed = parseArbitraryInt(text, bits);
+          var ret = new Array(Math.ceil(bits/32));
+          var i = 0;
+          while (bits > 0) {
+            ret[i] = { ident: parsed[i], bits: Math.min(32, bits) };
+            bits -= 32;
+            i++;
+          }
+          return ret;
+        }
+        data.functions.forEach(function(func) {
+          func.labels.forEach(function(label) {
+            var i = 0, bits;
+            while (i < label.lines.length) {
+              var item = label.lines[i];
+              if (item.intertype == 'store') {
+                if (isIllegalType(item.valueType)) {
+                  dprint('legalizer', 'Legalizing store at line ' + item.lineNum);
+                  bits = getBits(item.valueType);
+                  assert(item.value.intertype == 'value', 'TODO: unfolding');
+                  var elements;
+                  if (isNumber(item.value.ident)) {
+                    elements = getLegalLiterals(item.value.ident, bits);
+                  } else {
+                    elements = getLegalVars(item.value.ident, bits);
+                  }
+                  label.lines.splice(i, 1);
+                  var j = 0;
+                  elements.forEach(function(element) {
+                    var tempVar = '$st$' + i + '$' + j;
+                    label.lines.splice(i+j*2, 0, {
+                      intertype: 'assign',
+                      ident: tempVar,
+                      value: {
+                        intertype: 'getelementptr',
+                        ident: item.pointer.ident,
+                        type: '[0 x i32]*',
+                        params: [
+                          { intertype: 'value', ident: item.pointer.ident, type: '[0 x i32]*' }, // technically a bitcase is needed in llvm, but not for us
+                          { intertype: 'value', ident: '0', type: 'i32' },
+                          { intertype: 'value', ident: j.toString(), type: 'i32' }
+                        ],
+                      },
+                      lineNum: item.lineNum + (j/100)
+                    });
+                    var actualSizeType = 'i' + element.bits; // The last one may be smaller than 32 bits
+                    label.lines.splice(i+j*2+1, 0, {
+                      intertype: 'store',
+                      valueType: actualSizeType,
+                      value: { intertype: 'value', ident: element.ident, type: actualSizeType },
+                      pointer: { intertype: 'value', ident: tempVar, type: actualSizeType + '*' },
+                      ident: tempVar,
+                      pointerType: actualSizeType + '*',
+                      align: item.align,
+                      lineNum: item.lineNum + ((j+0.5)/100)
+                    });
+                    j++;
+                  });
+                  Types.needAnalysis['[0 x i32]'] = 0;
+                  i += j*2;
+                  continue;
+                }
+              } else if (item.intertype == 'assign') {
+                var value = item.value;
+                switch (value.intertype) {
+                  case 'load': {
+                    if (isIllegalType(value.valueType)) {
+                      dprint('legalizer', 'Legalizing load at line ' + item.lineNum);
+                      bits = getBits(value.valueType);
+                      assert(value.pointer.intertype == 'value', 'TODO: unfolding');
+                      var elements = getLegalVars(item.ident, bits);
+                      label.lines.splice(i, 1);
+                      var j = 0;
+                      elements.forEach(function(element) {
+                        var tempVar = '$st$' + i + '$' + j;
+                        label.lines.splice(i+j*2, 0, {
+                          intertype: 'assign',
+                          ident: tempVar,
+                          value: {
+                            intertype: 'getelementptr',
+                            ident: value.pointer.ident,
+                            type: '[0 x i32]*',
+                            params: [
+                              { intertype: 'value', ident: value.pointer.ident, type: '[0 x i32]*' }, // technically bitcast is needed in llvm, but not for us
+                              { intertype: 'value', ident: '0', type: 'i32' },
+                              { intertype: 'value', ident: j.toString(), type: 'i32' }
+                            ],
+                          },
+                          lineNum: item.lineNum + (j/100)
+                        });
+                        var actualSizeType = 'i' + element.bits; // The last one may be smaller than 32 bits
+                        label.lines.splice(i+j*2+1, 0, {
+                          intertype: 'assign',
+                          ident: element.ident,
+                          value: {
+                            intertype: 'load',
+                            pointerType: actualSizeType + '*',
+                            valueType: actualSizeType,
+                            type: actualSizeType, // XXX why is this missing from intertyper?
+                            pointer: { intertype: 'value', ident: tempVar, type: actualSizeType + '*' },
+                            ident: tempVar,
+                            pointerType: actualSizeType + '*',
+                            align: value.align,
+                          },
+                          lineNum: item.lineNum + ((j+0.5)/100)
+                        });
+                        j++;
+                      });
+                      Types.needAnalysis['[0 x i32]'] = 0;
+                      i += j*2;
+                      continue;
+                    }
+                  }
+                  case 'mathop': {
+                    if (isIllegalType(value.type)) {
+                      dprint('legalizer', 'Legalizing mathop at line ' + item.lineNum);
+                      label.lines.splice(i, 1);
+                      var toAdd = [];
+                      assert(value.param1.intertype == 'value', 'TODO: unfolding');
+                      var sourceBits = getBits(value.param1.type);
+                      var sourceElements;
+                      if (sourceBits <= 64) {
+                        // The input is a legal type
+                        if (sourceBits <= 32) {
+                          sourceElements = [{ ident: value.param1.ident, bits: sourceBits }];
+                        } else if (sourceBits == 64 && I64_MODE == 1) {
+                          sourceElements = [{ ident: value.param1.ident + '[0]', bits: 32 },
+                                            { ident: value.param1.ident + '[1]', bits: 32 }];
+                          // Add the source element as a param so that it is not eliminated as unneeded (the idents are not a simple ident here)
+                          toAdd.push({
+                            intertype: 'value', ident: ';', type: 'rawJS',
+                            params: [{ intertype: 'value', ident: value.param1.ident, type: 'i32' }]
+                          });
+                        } else {
+                          throw 'Invalid legal type as source of legalization ' + sourceBits;
+                        }
+                      } else {
+                        sourceElements = getLegalVars(value.param1.ident, sourceBits);
+                      }
+                      // All mathops can be parametrized by how many shifts we do, and how big the source is
+                      var shifts = 0;
+                      var targetBits;
+                      switch (value.op) {
+                        case 'lshr': {
+                          assert(value.param2.intertype == 'value', 'TODO: unfolding');
+                          shifts = parseInt(value.param2.ident);
+                          targetBits = sourceBits;
+                          break;
+                        }
+                        case 'shl': {
+                          assert(value.param2.intertype == 'value', 'TODO: unfolding');
+                          shifts = -parseInt(value.param2.ident);
+                          targetBits = sourceBits;
+                          break;
+                        }
+                        case 'trunc': case 'zext': {
+                          assert(value.param2.intertype == 'type' || value.param2.intertype == 'value', 'TODO: unfolding');
+                          targetBits = getBits(value.param2.ident);
+                          break;
+                        }
+                        default: throw 'Invalid mathop for legalization: ' + [value.op, item.lineNum, dump(item)];
+                      }
+                      // Do the legalization
+                      assert(isNumber(shifts), 'TODO: handle nonconstant shifts');
+                      var targetElements = getLegalVars(item.ident, targetBits);
+                      var sign = shifts >= 0 ? 1 : -1;
+                      var shiftOp = shifts >= 0 ? 'shl' : 'lshr';
+                      var shiftOpReverse = shifts >= 0 ? 'lshr' : 'shl';
+                      var whole = shifts >= 0 ? Math.floor(shifts/32) : Math.ceil(shifts/32);
+                      var fraction = Math.abs(shifts % 32);
+                      for (var j = 0; j < targetElements.length; j++) {
+                        var result = {
+                          intertype: 'value',
+                          ident: (j + whole >= 0 && j + whole < sourceElements.length) ? sourceElements[j + whole].ident : '0',
+                          type: 'i32',
+                        };
+                        if (fraction != 0) {
+                          var other = {
+                            intertype: 'value',
+                            ident: (j + sign + whole >= 0 && j + sign + whole < sourceElements.length) ? sourceElements[j + sign + whole].ident : '0',
+                            type: 'i32',
+                          };
+                          other = {
+                            intertype: 'mathop',
+                            op: shiftOp,
+                            type: 'i32',
+                            param1: other,
+                            param2: { intertype: 'value', ident: (32 - fraction).toString(), type: 'i32' }
+                          };
+                          result = {
+                            intertype: 'mathop',
+                            op: shiftOpReverse,
+                            type: 'i32',
+                            param1: result,
+                            param2: { intertype: 'value', ident: fraction.toString(), type: 'i32' }
+                          };
+                          result = {
+                            intertype: 'mathop',
+                            op: 'or',
+                            type: 'i32',
+                            param1: result,
+                            param2: other
+                          }
+                        }
+                        if (targetElements[j].bits < 32 && shifts < 0) {
+                          // truncate bits that fall off the end. This is not needed in most cases, can probably be optimized out
+                          result = {
+                            intertype: 'mathop',
+                            op: 'and',
+                            type: 'i32',
+                            param1: result,
+                            param2: { intertype: 'value', ident: (Math.pow(2, targetElements[j].bits)-1).toString(), type: 'i32' }
+                          }
+                        }
+                        toAdd.push({
+                          intertype: 'assign',
+                          ident: targetElements[j].ident,
+                          value: result,
+                          lineNum: item.lineNum + (j/100)
+                        });
+                      }
+                      if (targetBits <= 64) {
+                        // We are generating a normal legal type here
+                        var legalValue;
+                        if (targetBits == 64 && I64_MODE == 1) {
+                          // Generate an i64-1 [low,high]. This will be unnecessary when we legalize i64s
+                          legalValue = {
+                            intertype: 'value',
+                            ident: '[' + targetElements[0].ident + ',' + targetElements[1].ident + ']',
+                            type: 'rawJS',
+                            // Add the target elements as params so that they are not eliminated as unneeded (the ident is not a simple ident here)
+                            params: targetElements.map(function(element) {
+                              return { intertype: 'value', ident: element.ident, type: 'i32' };
+                            })
+                          };
+                        } else if (targetBits <= 32) {
+                          legalValue = { intertype: 'value', ident: targetElements[0].ident, type: 'rawJS' };
+                          // truncation to smaller than 32 bits has already been done, if necessary
+                        } else {
+                          throw 'Invalid legal type as target of legalization ' + targetBits;
+                        }
+                        toAdd.push({
+                          intertype: 'assign',
+                          ident: item.ident,
+                          value: legalValue,
+                          lineNum: item.lineNum + ((j+1)/100)
+                        });
+                      }
+                      Array.prototype.splice.apply(label.lines, [i, 0].concat(toAdd));
+                      i += toAdd.length;
+                      continue;
+                    }
+                  }
+                }
+              }
+              i++;
+              continue;
+            }
+          });
+        });
+      }
+
+      // Add function lines to func.lines, after our modifications to the label lines
+      data.functions.forEach(function(func) {
+        func.labels.forEach(function(label) {
+          func.lines = func.lines.concat(label.lines);
+        });
+      });
+      this.forwardItem(data, 'Typevestigator');
     }
   });
 
@@ -463,6 +771,12 @@ function analyzer(data, sidePass) {
       item.functions.forEach(function(func) {
         func.lines.forEach(function(line, i) {
           if (line.intertype === 'assign' && line.value.intertype === 'load') {
+            // Floats have no concept of signedness. Mark them as 'signed', which is the default, for which we do nothing
+            if (line.value.type in Runtime.FLOAT_TYPES) {
+              line.value.unsigned = false;
+              return;
+            }
+            // Booleans are always unsigned
             var data = func.variables[line.ident];
             if (data.type === 'i1') {
               line.value.unsigned = true;
diff --git a/src/intertyper.js b/src/intertyper.js
index ae9794b8..cf1d28ed 100644
--- a/src/intertyper.js
+++ b/src/intertyper.js
@@ -864,7 +864,7 @@ function intertyper(data, sidePass, baseLineNums) {
       var ret = {
         intertype: 'store',
         valueType: item.tokens[1].text,
-        value: parseLLVMSegment(segments[0]), // TODO: Make everything use this method, with finalizeLLVMParameter too
+        value: parseLLVMSegment(segments[0]),
         pointer: parseLLVMSegment(segments[1]),
         lineNum: item.lineNum
       };
diff --git a/src/jsifier.js b/src/jsifier.js
index 657a9673..62cab3d5 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -71,7 +71,7 @@ function JSify(data, functionsOnly, givenFunctions) {
         }
       }
     } else {
-      libFuncsToInclude = ['memset', 'malloc', 'free'];
+      libFuncsToInclude = ['memcpy', 'memset', 'malloc', 'free'];
     }
     libFuncsToInclude.forEach(function(ident) {
       data.functionStubs.push({
@@ -719,6 +719,9 @@ function JSify(data, functionsOnly, givenFunctions) {
       }
     });
   }
+  makeFuncLineActor('value', function(item) {
+    return item.ident;
+  });
   makeFuncLineActor('noop', function(item) {
     return ';';
   });
@@ -824,7 +827,7 @@ function JSify(data, functionsOnly, givenFunctions) {
     });
     labelSets.forEach(function(labelSet) {
       walkInterdata(labelSet.value, function mark(item) {
-        if (item.intertype == 'value' && item.ident in deps) {
+        if (item.intertype == 'value' && item.ident in deps && labelSet.ident != item.ident) {
           deps[labelSet.ident][item.ident] = true;
         }
       });
@@ -845,7 +848,7 @@ function JSify(data, functionsOnly, givenFunctions) {
         }
       }
       // If we got here, we have circular dependencies, and must break at least one.
-      pre = 'var ' + idents[0] + '$phi = ' + valueJSes[idents[i]] + ';' + pre;
+      pre = 'var ' + idents[0] + '$phi = ' + valueJSes[idents[0]] + ';' + pre;
       post += 'var ' + idents[0] + ' = ' + idents[0] + '$phi;';
       remove(idents[0]);
     }
diff --git a/src/library.js b/src/library.js
index 45c64bc7..47e15aae 100644
--- a/src/library.js
+++ b/src/library.js
@@ -2260,7 +2260,6 @@ LibraryManager.library = {
       } else if (type == 'i64') {
         ret = [{{{ makeGetValue('varargs', 'argIndex', 'i32', undefined, undefined, true) }}},
                {{{ makeGetValue('varargs', 'argIndex+4', 'i32', undefined, undefined, true) }}}];
-        ret = unSign(ret[0], 32) + unSign(ret[1], 32)*Math.pow(2, 32); // Unsigned in this notation. Signed later if needed. // XXX - loss of precision
 #else
       } else if (type == 'i64') {
         ret = {{{ makeGetValue('varargs', 'argIndex', 'i64', undefined, undefined, true) }}};
@@ -2270,7 +2269,7 @@ LibraryManager.library = {
         ret = {{{ makeGetValue('varargs', 'argIndex', 'i32', undefined, undefined, true) }}};
       }
       argIndex += Runtime.getNativeFieldSize(type);
-      return Number(ret);
+      return ret;
     }
 
     var ret = [];
@@ -2392,6 +2391,12 @@ LibraryManager.library = {
           var signed = next == 'd'.charCodeAt(0) || next == 'i'.charCodeAt(0);
           argSize = argSize || 4;
           var currArg = getNextArg('i' + (argSize * 8));
+#if I64_MODE == 1
+          // Flatten i64-1 [low, high] into a (slightly rounded) double
+          if (argSize == 8) {
+            currArg = Runtime.makeBigInt(currArg[0], currArg[1], next == 'u'.charCodeAt(0));
+          }
+#endif
           // Truncate to requested size.
           if (argSize <= 4) {
             var limit = Math.pow(256, argSize) - 1;
@@ -3444,7 +3449,7 @@ LibraryManager.library = {
   },
   strtoll__deps: ['_parseInt'],
   strtoll: function(str, endptr, base) {
-    return __parseInt(str, endptr, base, -9223372036854775808, 9223372036854775807, 64);  // LLONG_MIN, LLONG_MAX; imprecise.
+    return __parseInt(str, endptr, base, -9223372036854775200, 9223372036854775200, 64);  // LLONG_MIN, LLONG_MAX; imprecise.
   },
   strtol__deps: ['_parseInt'],
   strtol: function(str, endptr, base) {
diff --git a/src/parseTools.js b/src/parseTools.js
index ad6f2830..d4ef27eb 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -129,6 +129,11 @@ function isIntImplemented(type) {
   return type[0] == 'i' || isPointerType(type);
 }
 
+function getBits(type) {
+  if (!type || type[0] != 'i') return 0;
+  return parseInt(type.substr(1));
+}
+
 function isVoidType(type) {
   return type == 'void';
 }
@@ -558,22 +563,13 @@ function makeInlineCalculation(expression, value, tempVar) {
   return '(' + expression.replace(/VALUE/g, value) + ')';
 }
 
-// Given two 32-bit unsigned parts of an emulated 64-bit number, combine them into a JS number (double).
-// Rounding is inevitable if the number is large. This is a particular problem for small negative numbers
-// (-1 will be rounded!), so handle negatives separately and carefully
-function makeBigInt(low, high) {
-  // here VALUE will be the big part
-  return '(' + high + ' <= 2147483648 ? (' + makeSignOp(low, 'i32', 'un', 1, 1) + '+(' + makeSignOp(high, 'i32', 'un', 1, 1) + '*4294967296))' +
-                                    ' : (' + makeSignOp(low, 'i32', 're', 1, 1) + '+(1+' + makeSignOp(high, 'i32', 're', 1, 1) + ')*4294967296))';
-}
-
 // Makes a proper runtime value for a 64-bit value from low and high i32s. low and high are assumed to be unsigned.
 function makeI64(low, high) {
   high = high || '0';
   if (I64_MODE == 1) {
     return '[' + makeSignOp(low, 'i32', 'un', 1, 1) + ',' + makeSignOp(high, 'i32', 'un', 1, 1) + ']';
   } else {
-    if (high) return makeBigInt(low, high);
+    if (high) return RuntimeGenerator.makeBigInt(low, high);
     return low;
   }
 }
@@ -589,7 +585,7 @@ function splitI64(value) {
 }
 function mergeI64(value) {
   assert(I64_MODE == 1);
-  return makeInlineCalculation(makeBigInt('VALUE[0]', 'VALUE[1]'), value, 'tempI64');
+  return makeInlineCalculation(RuntimeGenerator.makeBigInt('VALUE[0]', 'VALUE[1]'), value, 'tempI64');
 }
 
 // Takes an i64 value and changes it into the [low, high] form used in i64 mode 1. In that
@@ -604,13 +600,12 @@ function makeCopyI64(value) {
   return value + '.slice(0)';
 }
 
-function parseI64Constant(str) {
-  assert(I64_MODE == 1);
+// Given a string representation of an integer of arbitrary size, return it
+// split up into 32-bit chunks
+function parseArbitraryInt(str, bits) {
+  // We parse the string into a vector of digits, base 10. This is convenient to work on.
 
-  if (!isNumber(str)) {
-    // This is a variable. Copy it, so we do not modify the original
-    return makeCopyI64(str);
-  }
+  assert(bits % 32 == 0 || ('i' + (bits % 32)) in Runtime.INT_TYPES, 'Arbitrary-sized ints must tails that are of legal size');
 
   function str2vec(s) { // index 0 is the highest value
     var ret = [];
@@ -668,6 +663,7 @@ function parseI64Constant(str) {
 
   if (str[0] == '-') {
     // twos-complement is needed
+    assert(bits == 64, "we only support 64-bit two's complement so far");
     str = str.substr(1);
     v = str2vec('18446744073709551616'); // 2^64
     subtract(v, str2vec(str));
@@ -675,23 +671,30 @@ function parseI64Constant(str) {
     v = str2vec(str);
   }
 
-  var bits = [];
+  var bitsv = [];
   while (!isZero(v)) {
-    bits.push((v[v.length-1] % 2 != 0)+0);
+    bitsv.push((v[v.length-1] % 2 != 0)+0);
     v[v.length-1] = v[v.length-1] & 0xfe;
     divide2(v);
   }
 
-  var low = 0, high = 0;
-  for (var i = 0; i < bits.length; i++) {
-    if (i <= 31) {
-      low += bits[i]*Math.pow(2, i);
-    } else {
-      high += bits[i]*Math.pow(2, i-32);
-    }
+  var ret = zeros(Math.ceil(bits/32));
+  for (var i = 0; i < bitsv.length; i++) {
+    ret[Math.floor(i/32)] += bitsv[i]*Math.pow(2, i % 32);
   }
+  return ret;
+}
 
-  return '[' + low + ',' + high + ']';
+function parseI64Constant(str) {
+  assert(I64_MODE == 1);
+
+  if (!isNumber(str)) {
+    // This is a variable. Copy it, so we do not modify the original
+    return makeCopyI64(str);
+  }
+
+  var parsed = parseArbitraryInt(str, 64);
+  return '[' + parsed[0] + ',' + parsed[1] + ']';
 }
 
 function parseNumerical(value, type) {
@@ -912,23 +915,32 @@ function makeGetValue(ptr, pos, type, noNeedFirst, unsigned, ignore, align, noSa
             'tempDoubleF64[0])';
   }
 
-  if (EMULATE_UNALIGNED_ACCESSES && USE_TYPED_ARRAYS == 2 && align && isIntImplemented(type)) { // TODO: support unaligned doubles and floats
+  if (USE_TYPED_ARRAYS == 2 && align) {
     // Alignment is important here. May need to split this up
     var bytes = Runtime.getNativeTypeSize(type);
     if (bytes > align) {
-      var ret = '/* unaligned */(';
-      if (bytes <= 4) {
-        for (var i = 0; i < bytes; i++) {
-          ret += 'tempInt' + (i == 0 ? '=' : (i < bytes-1 ? '+=((' : '+(('));
-          ret += makeSignOp(makeGetValue(ptr, getFastValue(pos, '+', i), 'i8', noNeedFirst, unsigned, ignore), 'i8', 'un', true);
-          if (i > 0) ret += ')<<' + (8*i) + ')';
-          if (i < bytes-1) ret += ',';
+      var ret = '(';
+      if (isIntImplemented(type)) {
+        if (bytes <= 4) {
+          for (var i = 0; i < bytes; i++) {
+            ret += 'tempInt' + (i == 0 ? '=' : '|=((');
+            ret += makeGetValue(ptr, getFastValue(pos, '+', i), 'i8', noNeedFirst, 1, ignore);
+            if (i > 0) ret += ')<<' + (8*i) + ')';
+            ret += ',';
+          }
+          ret += makeSignOp('tempInt', type, unsigned ? 'un' : 're', true);
+        } else {
+          assert(bytes == 8);
+          ret += 'tempBigInt=' + makeGetValue(ptr, pos, 'i32', noNeedFirst, true, ignore, align) + ',';
+          ret += 'tempBigInt2=' + makeGetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'i32', noNeedFirst, true, ignore, align) + ',';
+          ret += makeI64('tempBigInt', 'tempBigInt2');
         }
       } else {
-        assert(bytes == 8);
-        ret += 'tempBigInt=' + makeGetValue(ptr, pos, 'i32', noNeedFirst, true, ignore, align) + ',';
-        ret += 'tempBigInt2=' + makeGetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'i32', noNeedFirst, true, ignore, align) + ',';
-        ret += makeI64('tempBigInt', 'tempBigInt2');
+        if (type == 'float') {
+          ret += 'copyTempFloat(' + getFastValue(ptr, '+', pos) + '),tempDoubleF32[0]';
+        } else {
+          ret += 'copyTempDouble(' + getFastValue(ptr, '+', pos) + '),tempDoubleF64[0]';
+        }
       }
       ret += ')';
       return ret;
@@ -994,22 +1006,27 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe)
             makeSetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'tempDoubleI32[1]', 'i32', noNeedFirst, ignore, align) + ')';
   }
 
-  if (EMULATE_UNALIGNED_ACCESSES && USE_TYPED_ARRAYS == 2 && align && isIntImplemented(type)) { // TODO: support unaligned doubles and floats
+  if (USE_TYPED_ARRAYS == 2 && align) {
     // Alignment is important here. May need to split this up
     var bytes = Runtime.getNativeTypeSize(type);
     if (bytes > align) {
-      var ret = '/* unaligned */';
-      if (bytes <= 4) {
-        ret += 'tempBigInt=' + value + ';';
-        for (var i = 0; i < bytes; i++) {
-          ret += makeSetValue(ptr, getFastValue(pos, '+', i), 'tempBigInt&0xff', 'i8', noNeedFirst, ignore) + ';';
-          if (i < bytes-1) ret += 'tempBigInt>>=8;';
+      var ret = '';
+      if (isIntImplemented(type)) {
+        if (bytes <= 4) {
+          ret += 'tempBigInt=' + value + ';';
+          for (var i = 0; i < bytes; i++) {
+            ret += makeSetValue(ptr, getFastValue(pos, '+', i), 'tempBigInt&0xff', 'i8', noNeedFirst, ignore) + ';';
+            if (i < bytes-1) ret += 'tempBigInt>>=8;';
+          }
+        } else {
+          assert(bytes == 8);
+          ret += 'tempPair=' + ensureI64_1(value) + ';';
+          ret += makeSetValue(ptr, pos, 'tempPair[0]', 'i32', noNeedFirst, ignore, align) + ';';
+          ret += makeSetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'tempPair[1]', 'i32', noNeedFirst, ignore, align) + ';';
         }
       } else {
-        assert(bytes == 8);
-        ret += 'tempPair=' + ensureI64_1(value) + ';';
-        ret += makeSetValue(ptr, pos, 'tempPair[0]', 'i32', noNeedFirst, ignore, align) + ';';
-        ret += makeSetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'tempPair[1]', 'i32', noNeedFirst, ignore, align) + ';';
+        ret += makeSetValue('tempDoublePtr', 0, value, type, noNeedFirst, ignore, 8) + ';';
+        ret += makeCopyValues(getFastValue(ptr, '+', pos), 'tempDoublePtr', Runtime.getNativeTypeSize(type), type, null, align);
       }
       return ret;
     }
@@ -1447,6 +1464,8 @@ function finalizeLLVMParameter(param, noIndexizeFunctions) {
     return finalizeBlockAddress(param);
   } else if (param.intertype === 'type') {
     return param.ident; // we don't really want the type here
+  } else if (param.intertype == 'mathop') {
+    return processMathop(param);
   } else {
     throw 'invalid llvm parameter: ' + param.intertype;
   }
@@ -1465,7 +1484,7 @@ function makeSignOp(value, type, op, force, ignore) {
   var bits, full;
   if (type in Runtime.INT_TYPES) {
     bits = parseInt(type.substr(1));
-    full = op + 'Sign(' + value + ', ' + bits + ', ' + Math.floor(correctSpecificSign() && !PGO) + (
+    full = op + 'Sign(' + value + ', ' + bits + ', ' + Math.floor(ignore || (correctSpecificSign() && !PGO)) + (
       PGO ? ', "' + (ignore ? '' : Debugging.getIdentifier()) + '"' : ''
     ) + ')';
     // Always sign/unsign constants at compile time, regardless of CHECK/CORRECT
@@ -1473,7 +1492,7 @@ function makeSignOp(value, type, op, force, ignore) {
       return eval(full).toString();
     }
   }
-  if (!correctSigns() && !CHECK_SIGNS && !force) return value;
+  if ((ignore || !correctSigns()) && !CHECK_SIGNS && !force) return value;
   if (type in Runtime.INT_TYPES) {
     // shortcuts
     if (!CHECK_SIGNS || ignore) {
@@ -1589,17 +1608,43 @@ function processMathop(item) {
       case 'xor': {
         return '[' + ident1 + '[0] ^ ' + ident2 + '[0], ' + ident1 + '[1] ^ ' + ident2 + '[1]]';
       }
-      case 'shl': {
-        return '[' + ident1 + '[0] << ' + ident2 + ', ' +
-                 '('+ident1 + '[1] << ' + ident2 + ') | ((' + ident1 + '[0]&((Math.pow(2, ' + ident2 + ')-1)<<(32-' + ident2 + '))) >>> (32-' + ident2 + '))]';
-      }
-      case 'ashr': {
-        return '[('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&(Math.pow(2, ' + ident2 + ')-1))<<(32-' + ident2 + ')),' +
-                    ident1 + '[1] >>> ' + ident2 + ']';
-      }
+      case 'shl':
+      case 'ashr':
       case 'lshr': {
-        return '[('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&(Math.pow(2, ' + ident2 + ')-1))<<(32-' + ident2 + ')),' +
-                    ident1 + '[1] >>> ' + ident2 + ']';
+        assert(isNumber(ident2));
+        bits = parseInt(ident2);
+        var ander = Math.pow(2, bits)-1;
+        if (bits < 32) {
+          switch (op) {
+            case 'shl':
+              return '[' + ident1 + '[0] << ' + ident2 + ', ' +
+                       '('+ident1 + '[1] << ' + ident2 + ') | ((' + ident1 + '[0]&(' + ander + '<<' + (32 - bits) + ')) >>> (32-' + ident2 + '))]';
+            case 'ashr':
+              return '[((('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&' + ander + ')<<' + (32 - bits) + ')) >> 0) >>> 0,' +
+                          '(' + ident1 + '[1] >> ' + ident2 + ') >>> 0]';
+            case 'lshr':
+              return '[(('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&' + ander + ')<<' + (32 - bits) + ')) >>> 0,' +
+                          ident1 + '[1] >>> ' + ident2 + ']';
+          }
+        } else if (bits == 32) {
+          switch (op) {
+            case 'shl':
+              return '[0, ' + ident1 + '[0]]';
+            case 'ashr':
+              return '[' + ident1 + '[1], (' + ident1 + '[1]|0) < 0 ? ' + ander + ' : 0]';
+            case 'lshr':
+              return '[' + ident1 + '[1], 0]';
+          }
+        } else { // bits > 32
+          switch (op) {
+            case 'shl':
+              return '[0, ' + ident1 + '[0] << ' + (bits - 32) + ']';
+            case 'ashr':
+              return '[(' + ident1 + '[1] >> ' + (bits - 32) + ') >>> 0, (' + ident1 + '[1]|0) < 0 ? ' + ander + ' : 0]';
+            case 'lshr':
+              return '[' + ident1 + '[1] >>> ' + (bits - 32) + ', 0]';
+          }
+        }
       }
       case 'uitofp': case 'sitofp': return ident1 + '[0] + ' + ident1 + '[1]*4294967296';
       case 'fptoui': case 'fptosi': return splitI64(ident1);
diff --git a/src/preamble.js b/src/preamble.js
index 9370fa54..d0391af5 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -344,13 +344,6 @@ var tempValue, tempInt, tempBigInt, tempInt2, tempBigInt2, tempPair, tempBigIntI
 #if I64_MODE == 1
 var tempI64, tempI64b;
 #endif
-#if DOUBLE_MODE == 1
-#if USE_TYPED_ARRAYS == 2
-var tempDoubleBuffer = new ArrayBuffer(8);
-var tempDoubleI32 = new Int32Array(tempDoubleBuffer);
-var tempDoubleF64 = new Float64Array(tempDoubleBuffer);
-#endif
-#endif
 
 function abort(text) {
   print(text + ':\n' + (new Error).stack);
@@ -369,6 +362,7 @@ function assert(condition, text) {
 // makeSetValue is done at compile-time and generates the needed
 // code then, whereas this function picks the right code at
 // run-time.
+// Note that setValue and getValue only do *aligned* writes and reads!
 
 function setValue(ptr, value, type, noSafe) {
   type = type || 'i8';
@@ -648,6 +642,33 @@ Module['HEAPF32'] = HEAPF32;
 STACK_ROOT = STACKTOP = Runtime.alignMemory(STATICTOP);
 STACK_MAX = STACK_ROOT + TOTAL_STACK;
 
+#if DOUBLE_MODE == 1
+#if USE_TYPED_ARRAYS == 2
+var tempDoublePtr = Runtime.alignMemory(STACK_MAX, 8);
+var tempDoubleI8  = HEAP8.subarray(tempDoublePtr);
+var tempDoubleI32 = HEAP32.subarray(tempDoublePtr >> 2);
+var tempDoubleF32 = HEAPF32.subarray(tempDoublePtr >> 2);
+var tempDoubleF64 = new Float64Array(HEAP8.buffer).subarray(tempDoublePtr >> 3);
+function copyTempFloat(ptr) { // functions, because inlining this code is increases code size too much
+  tempDoubleI8[0] = HEAP8[ptr];
+  tempDoubleI8[1] = HEAP8[ptr+1];
+  tempDoubleI8[2] = HEAP8[ptr+2];
+  tempDoubleI8[3] = HEAP8[ptr+3];
+}
+function copyTempDouble(ptr) {
+  tempDoubleI8[0] = HEAP8[ptr];
+  tempDoubleI8[1] = HEAP8[ptr+1];
+  tempDoubleI8[2] = HEAP8[ptr+2];
+  tempDoubleI8[3] = HEAP8[ptr+3];
+  tempDoubleI8[4] = HEAP8[ptr+4];
+  tempDoubleI8[5] = HEAP8[ptr+5];
+  tempDoubleI8[6] = HEAP8[ptr+6];
+  tempDoubleI8[7] = HEAP8[ptr+7];
+}
+STACK_MAX = tempDoublePtr + 8;
+#endif
+#endif
+
 STATICTOP = alignMemoryPage(STACK_MAX);
 
 function callRuntimeCallbacks(callbacks) {
diff --git a/src/runtime.js b/src/runtime.js
index 6439d0ed..6f17028a 100644
--- a/src/runtime.js
+++ b/src/runtime.js
@@ -73,6 +73,15 @@ var RuntimeGenerator = {
       quantum = '(quantum ? quantum : {{{ QUANTUM_SIZE }}})';
     }
     return target + ' = ' + Runtime.forceAlign(target, quantum);
+  },
+
+  // Given two 32-bit unsigned parts of an emulated 64-bit number, combine them into a JS number (double).
+  // Rounding is inevitable if the number is large. This is a particular problem for small negative numbers
+  // (-1 will be rounded!), so handle negatives separately and carefully
+  makeBigInt: function(low, high, unsigned) {
+    return '(' + unsigned +
+           ' ? (' + makeSignOp(low, 'i32', 'un', 1, 1) + '+(' + makeSignOp(high, 'i32', 'un', 1, 1) + '*4294967296))' +
+           ' : (' + makeSignOp(low, 'i32', 'un', 1, 1) + '+(' + makeSignOp(high, 'i32', 're', 1, 1) + '*4294967296)))';
   }
 };
 
@@ -260,6 +269,7 @@ var Runtime = {
 Runtime.stackAlloc = unInline('stackAlloc', ['size']);
 Runtime.staticAlloc = unInline('staticAlloc', ['size']);
 Runtime.alignMemory = unInline('alignMemory', ['size', 'quantum']);
+Runtime.makeBigInt = unInline('makeBigInt', ['low', 'high', 'unsigned']);
 
 function getRuntime() {
   var ret = 'var Runtime = {\n';
diff --git a/src/settings.js b/src/settings.js
index 7e900ea9..16a7b665 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -73,13 +73,6 @@ var DOUBLE_MODE = 1; // How to load and store 64-bit doubles. Without typed arra
                      // then load it aligned, and that load-store will make JS engines alter it if it is being
                      // stored to a typed array for security reasons. That will 'fix' the number from being a
                      // NaN or an infinite number.
-var EMULATE_UNALIGNED_ACCESSES = 0; // If set, the compiler will 'emulate' loads and stores that are not known to
-                                    // be sufficiently aligned, by working on individual bytes. This can be
-                                    // important in USE_TYPED_ARRAYS == 2, where unaligned accesses do not work,
-                                    // specifically in the case where unsafe LLVM optimizations have generated possibly
-                                    // unaligned code. (Without unsafe LLVM optimizations, there should be no
-                                    // need for this option.)
-                                    // Currently this only works for integers, not doubles and floats.
 
 var CLOSURE_ANNOTATIONS = 0; // If set, the generated code will be annotated for the closure
                              // compiler. This potentially lets closure optimize the code better.
@@ -225,6 +218,7 @@ var DEBUG_TAGS_SHOWING = [];
   //    relooping
   //    unparsedFunctions
   //    metadata
+  //    legalizer
 
 
 // A cached set of defines, generated from the header files. This
diff --git a/system/lib/debugging.cpp b/system/lib/debugging.cpp
new file mode 100644
index 00000000..ff9e0d68
--- /dev/null
+++ b/system/lib/debugging.cpp
@@ -0,0 +1,22 @@
+
+// Some stuff to patch up an emscripten-sdk build so it can be built natively (see nativize_llvm)
+
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" {
+
+int *__errno()
+{
+  static int e = 0;
+  return &e;
+}
+
+void __assert_func(const char *file, int line, const char *assertt, const char *cond)
+{
+  printf("assert-func: %s : %d : %s : %s\n", file, line, assertt, cond);
+  abort();
+}
+
+}
+
diff --git a/tests/cases/legalizer_ta2.ll b/tests/cases/legalizer_ta2.ll
new file mode 100644
index 00000000..45cdf432
--- /dev/null
+++ b/tests/cases/legalizer_ta2.ll
@@ -0,0 +1,85 @@
+; ModuleID = 'tests/hello_world.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @main() {
+entry:
+  %buffer = alloca i8, i32 1000, align 4
+  %bundled = bitcast i8* %buffer to i104*
+  store i104 31079605376604435891501163880, i104* %bundled, align 4 ; hello world in there
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  %loaded = load i104* %bundled, align 4 ; save for later
+
+  %backcast = bitcast i104* %bundled to i8*
+  call i32 (i8*)* @puts(i8* %backcast)
+
+  %temp.buffer = bitcast i8* %buffer to [0 x i8]*
+  %buffer1 = getelementptr [0 x i8]* %temp.buffer, i32 0, i32 1
+  %bundled1 = bitcast i8* %buffer1 to i104*
+  store i104 31079605376604435891501163880, i104* %bundled1, align 1 ; unaligned
+  call i32 (i8*)* @puts(i8* %buffer)
+
+; shifts
+  %shifted = lshr i104 %loaded, 16
+  store i104 %shifted, i104* %bundled, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+  %shifted2 = lshr i104 %loaded, 32
+  store i104 %shifted2, i104* %bundled, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+; store %loaded, make sure has not been modified
+  store i104 %loaded, i104* %bundled, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  %shifted3 = shl i104 %loaded, 8
+  store i104 %shifted3, i104* %bundled, align 4
+  store i8 113, i8* %buffer ; remove initial 0 ; 'q'
+  call i32 (i8*)* @puts(i8* %buffer)
+
+; trunc
+  %shifted4 = shl i104 %loaded, 64
+  store i104 %shifted4, i104* %bundled, align 4
+  %nonzero64 = trunc i104 %loaded to i64 ; remove initial zeros
+  %bundled64 = bitcast i104* %bundled to i64*
+  store i64 %nonzero64, i64* %bundled64, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  store i104 0, i104* %bundled, align 4 ; wipe it out
+  %small32 = trunc i104 %loaded to i32
+  %buffer32 = bitcast i8* %buffer to i32*
+  store i32 %small32, i32* %buffer32, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  store i104 0, i104* %bundled, align 4 ; wipe it out
+  %small16 = trunc i104 %loaded to i16
+  %buffer16 = bitcast i8* %buffer to i16*
+  store i16 %small16, i16* %buffer16, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  store i104 0, i104* %bundled, align 4 ; wipe it out
+  %small64 = trunc i104 %loaded to i64
+  %buffer64 = bitcast i8* %buffer to i64*
+  store i64 %small64, i64* %buffer64, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+; zext
+  store i104 0, i104* %bundled, align 4 ; wipe it out
+  %pre32 = or i32 6382179, 0
+  %big = zext i32 %pre32 to i104
+  store i104 %big, i104* %bundled, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  store i104 0, i104* %bundled, align 4 ; wipe it out
+  %pre64 = zext i32 1684366951 to i64
+  %post64 = shl i64 %pre64, 32
+  %big64 = or i64 %pre64, %post64
+  %bigb = zext i64 %big64 to i104
+  store i104 %bigb, i104* %bundled, align 4
+  call i32 (i8*)* @puts(i8* %buffer)
+
+  ret i32 1
+}
+
+declare i32 @puts(i8*)
+
diff --git a/tests/cases/legalizer_ta2.txt b/tests/cases/legalizer_ta2.txt
new file mode 100644
index 00000000..ae9b4a47
--- /dev/null
+++ b/tests/cases/legalizer_ta2.txt
@@ -0,0 +1,13 @@
+hello, world
+hello, world
+hhello, world
+llo, world
+o, world
+hello, world
+qhello, world
+hello, whello
+hell
+he
+hello, w
+cba
+gfedgfed
diff --git a/tests/cases/phiself.ll b/tests/cases/phiself.ll
new file mode 100644
index 00000000..81249799
--- /dev/null
+++ b/tests/cases/phiself.ll
@@ -0,0 +1,69 @@
+; ModuleID = '/tmp/emscripten_temp/src.cpp.o'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+@.str = private unnamed_addr constant [7 x i8] c"cheez\0A\00", align 1
+@.str1 = private unnamed_addr constant [6 x i8] c"*%d*\0A\00", align 1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %x = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 5, i32* %x, align 4, !dbg !15
+  store i32 0, i32* %i, align 4, !dbg !19
+  br label %for.cond, !dbg !19
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %xxx = phi i32 [ 43, %entry ], [add (i32 %xxx, i32 3), %for.inc ]        ; [#uses=1 type=i1]
+  %yyy = phi i32 [ 32, %entry ], [add (i32 %yyy, i32 2), %for.inc ]        ; [#uses=1 type=i1]
+  %zzz = phi i32 [ 21, %entry ], [add (i32 %zzz, i32 1), %for.inc ]        ; [#uses=1 type=i1]
+  %0 = load i32* %i, align 4, !dbg !19
+  %cmp = icmp slt i32 %0, 6, !dbg !19
+  br i1 %cmp, label %for.body, label %for.end, !dbg !19
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %x, align 4, !dbg !20
+  %2 = load i32* %i, align 4, !dbg !20
+  %mul = mul nsw i32 %1, %2, !dbg !20
+  %3 = load i32* %x, align 4, !dbg !20
+  %add = add nsw i32 %3, %mul, !dbg !20
+  store i32 %add, i32* %x, align 4, !dbg !20
+  %4 = load i32* %x, align 4, !dbg !22
+  %cmp1 = icmp sgt i32 %4, 1000, !dbg !22
+  br i1 %cmp1, label %if.then, label %if.end4, !dbg !22
+
+if.then:                                          ; preds = %for.body
+  %5 = load i32* %x, align 4, !dbg !23
+  %rem = srem i32 %5, 7, !dbg !23
+  %cmp2 = icmp eq i32 %rem, 0, !dbg !23
+  br i1 %cmp2, label %if.then3, label %if.end, !dbg !23
+
+if.then3:                                         ; preds = %if.then
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)), !dbg !25
+  br label %if.end, !dbg !25
+
+if.end:                                           ; preds = %if.then3, %if.then
+  %6 = load i32* %x, align 4, !dbg !26
+  %div = sdiv i32 %6, 2, !dbg !26
+  store i32 %div, i32* %x, align 4, !dbg !26
+  br label %for.end, !dbg !27
+
+if.end4:                                          ; preds = %for.body
+  br label %for.inc, !dbg !28
+
+for.inc:                                          ; preds = %if.end4
+  %7 = load i32* %i, align 4, !dbg !29
+  %inc = add nsw i32 %7, 1, !dbg !29
+  store i32 %inc, i32* %i, align 4, !dbg !29
+  br label %for.cond, !dbg !29
+
+for.end:                                          ; preds = %if.end, %for.cond
+  %8 = load i32* %x, align 4, !dbg !30
+  %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i32 %zzz), !dbg !30
+  ret i32 0, !dbg !31
+}
+
+declare i32 @printf(i8*, ...)
+
diff --git a/tests/cases/phiself.py b/tests/cases/phiself.py
new file mode 100644
index 00000000..14cf8e21
--- /dev/null
+++ b/tests/cases/phiself.py
@@ -0,0 +1,3 @@
+if Settings.MICRO_OPTS:
+  assert '$phi' not in generated, 'we should optimize out self-phis'
+
diff --git a/tests/cases/phiself.txt b/tests/cases/phiself.txt
new file mode 100644
index 00000000..0d4cb717
--- /dev/null
+++ b/tests/cases/phiself.txt
@@ -0,0 +1 @@
+*26*
diff --git a/tests/parseInt/output.txt b/tests/parseInt/output.txt
index e345e2ac..7ab00631 100644
--- a/tests/parseInt/output.txt
+++ b/tests/parseInt/output.txt
@@ -1,6 +1,6 @@
 strtol("-9223372036854775809") = -2147483648
 ERR 34
-strtoll("-9223372036854775809") = 9223372036854776000
+strtoll("-9223372036854775809") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775809") = 4294967295
 ERR 34
@@ -8,7 +8,7 @@ strtoull("-9223372036854775809") = 9223372036854774000
 
 strtol("-9223372036854775808") = -2147483648
 ERR 34
-strtoll("-9223372036854775808") = 9223372036854776000
+strtoll("-9223372036854775808") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775808") = 4294967295
 ERR 34
@@ -16,7 +16,7 @@ strtoull("-9223372036854775808") = 9223372036854774000
 
 strtol("-9223372036854775807") = -2147483648
 ERR 34
-strtoll("-9223372036854775807") = 9223372036854776000
+strtoll("-9223372036854775807") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775807") = 4294967295
 ERR 34
@@ -24,7 +24,7 @@ strtoull("-9223372036854775807") = 9223372036854774000
 
 strtol("-2147483649") = -2147483648
 ERR 34
-strtoll("-2147483649") = -2147483648
+strtoll("-2147483649") = -2147483649
 strtoul("-2147483649") = 2147483647
 strtoull("-2147483649") = 18446744071562068000
 
@@ -34,17 +34,17 @@ strtoul("-2147483648") = 2147483648
 strtoull("-2147483648") = 18446744071562068000
 
 strtol("-2147483647") = -2147483647
-strtoll("-2147483647") = -2147483648
+strtoll("-2147483647") = -2147483647
 strtoul("-2147483647") = 2147483649
 strtoull("-2147483647") = 18446744071562068000
 
 strtol("-5") = -5
-strtoll("-5") = 0
+strtoll("-5") = -5
 strtoul("-5") = 4294967291
 strtoull("-5") = 18446744069414584000
 
 strtol("-1") = -1
-strtoll("-1") = 0
+strtoll("-1") = -1
 strtoul("-1") = 4294967295
 strtoull("-1") = 18446744069414584000
 
@@ -100,7 +100,7 @@ strtoull("4294967296") = 4294967296
 
 strtol("18446744073709551614") = 2147483647
 ERR 34
-strtoll("18446744073709551614") = 9223372036854776000
+strtoll("18446744073709551614") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551614") = 4294967295
 ERR 34
@@ -108,7 +108,7 @@ strtoull("18446744073709551614") = 18446744069414584000
 
 strtol("18446744073709551615") = 2147483647
 ERR 34
-strtoll("18446744073709551615") = 9223372036854776000
+strtoll("18446744073709551615") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551615") = 4294967295
 ERR 34
@@ -116,7 +116,7 @@ strtoull("18446744073709551615") = 18446744069414584000
 
 strtol("18446744073709551616") = 2147483647
 ERR 34
-strtoll("18446744073709551616") = 9223372036854776000
+strtoll("18446744073709551616") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551616") = 4294967295
 ERR 34
diff --git a/tests/parseInt/output_i64mode1.txt b/tests/parseInt/output_i64mode1.txt
index 649500b0..7ab00631 100644
--- a/tests/parseInt/output_i64mode1.txt
+++ b/tests/parseInt/output_i64mode1.txt
@@ -1,6 +1,6 @@
 strtol("-9223372036854775809") = -2147483648
 ERR 34
-strtoll("-9223372036854775809") = 9223372036854776000
+strtoll("-9223372036854775809") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775809") = 4294967295
 ERR 34
@@ -8,7 +8,7 @@ strtoull("-9223372036854775809") = 9223372036854774000
 
 strtol("-9223372036854775808") = -2147483648
 ERR 34
-strtoll("-9223372036854775808") = 9223372036854776000
+strtoll("-9223372036854775808") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775808") = 4294967295
 ERR 34
@@ -16,7 +16,7 @@ strtoull("-9223372036854775808") = 9223372036854774000
 
 strtol("-9223372036854775807") = -2147483648
 ERR 34
-strtoll("-9223372036854775807") = 9223372036854776000
+strtoll("-9223372036854775807") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775807") = 4294967295
 ERR 34
@@ -24,7 +24,7 @@ strtoull("-9223372036854775807") = 9223372036854774000
 
 strtol("-2147483649") = -2147483648
 ERR 34
-strtoll("-2147483649") = -2147483648
+strtoll("-2147483649") = -2147483649
 strtoul("-2147483649") = 2147483647
 strtoull("-2147483649") = 18446744071562068000
 
@@ -34,19 +34,19 @@ strtoul("-2147483648") = 2147483648
 strtoull("-2147483648") = 18446744071562068000
 
 strtol("-2147483647") = -2147483647
-strtoll("-2147483647") = -2147483648
+strtoll("-2147483647") = -2147483647
 strtoul("-2147483647") = 2147483649
 strtoull("-2147483647") = 18446744071562068000
 
 strtol("-5") = -5
-strtoll("-5") = 0
+strtoll("-5") = -5
 strtoul("-5") = 4294967291
-strtoull("-5") = 18446744073709552000
+strtoull("-5") = 18446744069414584000
 
 strtol("-1") = -1
-strtoll("-1") = 0
+strtoll("-1") = -1
 strtoul("-1") = 4294967295
-strtoull("-1") = 18446744073709552000
+strtoull("-1") = 18446744069414584000
 
 strtol("0") = 0
 strtoll("0") = 0
@@ -100,27 +100,27 @@ strtoull("4294967296") = 4294967296
 
 strtol("18446744073709551614") = 2147483647
 ERR 34
-strtoll("18446744073709551614") = 9223372036854776000
+strtoll("18446744073709551614") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551614") = 4294967295
 ERR 34
-strtoull("18446744073709551614") = 18446744073709552000
+strtoull("18446744073709551614") = 18446744069414584000
 
 strtol("18446744073709551615") = 2147483647
 ERR 34
-strtoll("18446744073709551615") = 9223372036854776000
+strtoll("18446744073709551615") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551615") = 4294967295
 ERR 34
-strtoull("18446744073709551615") = 18446744073709552000
+strtoull("18446744073709551615") = 18446744069414584000
 
 strtol("18446744073709551616") = 2147483647
 ERR 34
-strtoll("18446744073709551616") = 9223372036854776000
+strtoll("18446744073709551616") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551616") = 4294967295
 ERR 34
-strtoull("18446744073709551616") = 18446744073709552000
+strtoull("18446744073709551616") = 18446744069414584000
 
 strtol("0x12", 0, 0) = 18
 strtol("0x12", 0, 10) = 0
diff --git a/tests/runner.py b/tests/runner.py
index f92793d4..cbff37eb 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -126,7 +126,7 @@ process(sys.argv[1])
   # Build JavaScript code from source code
   def build(self, src, dirname, filename, output_processor=None, main_file=None, additional_files=[], libraries=[], includes=[], build_ll_hook=None, extra_emscripten_args=[], post_build=None):
 
-    Building.pick_llvm_opts(3, safe=Building.LLVM_OPTS != 2) # pick llvm opts here, so we include changes to Settings in the test case code
+    Building.pick_llvm_opts(3) # pick llvm opts here, so we include changes to Settings in the test case code
 
     # Copy over necessary files for compiling the source
     if main_file is None:
@@ -412,6 +412,17 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv):
             #include <stdio.h>
             int main()
             {
+              long long a = 0x2b00505c10;
+              long long b = a >> 29;
+              long long c = a >> 32;
+              long long d = a >> 34;
+              printf("*%Ld,%Ld,%Ld,%Ld*\\n", a, b, c, d);
+              unsigned long long ua = 0x2b00505c10;
+              unsigned long long ub = ua >> 29;
+              unsigned long long uc = ua >> 32;
+              unsigned long long ud = ua >> 34;
+              printf("*%Ld,%Ld,%Ld,%Ld*\\n", ua, ub, uc, ud);
+
               long long x = 0x0000def123450789ULL; // any bigger than this, and we
               long long y = 0x00020ef123456089ULL; // start to run into the double precision limit!
               printf("*%Ld,%Ld,%Ld,%Ld,%Ld*\\n", x, y, x | y, x & y, x ^ y, x >> 2, y << 2);
@@ -428,7 +439,7 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv):
               return 0;
             }
           '''
-          self.do_run(src, '*245127260211081,579378795077769,808077213656969,16428841631881,791648372025088*\n*13.00,6.00,3.00,*3*')
+          self.do_run(src, '*184688860176,344,43,10*\n*184688860176,344,43,10*\n*245127260211081,579378795077769,808077213656969,16428841631881,791648372025088*\n*13.00,6.00,3.00,*3*')
 
         if Settings.QUANTUM_SIZE == 1: return self.skip('TODO: i64 mode 1 for q1')
 
@@ -497,12 +508,26 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv):
             // global structs with i64s
             printf("*%d,%Ld*\n*%d,%Ld*\n", iub[0].c, iub[0].d, iub[1].c, iub[1].d);
 
+            // Bitshifts
+            {
+              int64_t a = -1;
+              int64_t b = a >> 29;
+              int64_t c = a >> 32;
+              int64_t d = a >> 34;
+              printf("*%Ld,%Ld,%Ld,%Ld*\n", a, b, c, d);
+              uint64_t ua = -1;
+              int64_t ub = ua >> 29;
+              int64_t uc = ua >> 32;
+              int64_t ud = ua >> 34;
+              printf("*%Ld,%Ld,%Ld,%Ld*\n", ua, ub, uc, ud);
+            }
+
             // Math mixtures with doubles
             {
               uint64_t a = 5;
               double b = 6.8;
               uint64_t c = a * b;
-              printf("*prod:%llu*\n*%d,%d,%d*", c, (int)&a, (int)&b, (int)&c); // printing addresses prevents optimizations
+              printf("*prod:%llu*\n*%d,%d,%d*\n", c, (int)&a, (int)&b, (int)&c); // printing addresses prevents optimizations
             }
 
             // Basic (rounded, for now) math. Just check compilation.
@@ -515,11 +540,21 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv):
             return 0;
           }
         '''
-        self.do_run(src, '*1311918518731868200\n0,0,0,1,1\n1,0,1,0,1*\n*245127260211081*\n*245127260209443*\n' +
-                         '*18446744073709552000*\n*576460752303423500*\n' +
-                         'm1: 127\n*123*\n*127*\n' +
-                         '*55,17179869201*\n*122,25769803837*\n' +
-                         '*prod:34*\n')
+        self.do_run(src, '*1311918518731868200\n' +
+                         '0,0,0,1,1\n' +
+                         '1,0,1,0,1*\n' +
+                         '*245127260211081*\n' +
+                         '*245127260209443*\n' +
+                         '*18446744073709552000*\n' +
+                         '*576460752303423500*\n' +
+                         'm1: 127\n' +
+                         '*123*\n' +
+                         '*127*\n' +
+                         '*55,17179869201*\n' +
+                         '*122,25769803837*\n' +
+                         '*-1,-1,-1,-1*\n' +
+                         '*-1,34359738367,4294967295,1073741823*\n' +
+                         '*prod:34*')
 
         Settings.CORRECT_SIGNS = 1
 
@@ -705,7 +740,7 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv):
         self.do_run(src, '*255*\n*65535*\n*-1*\n*-1*\n*-1*')
 
     def test_bitfields(self):
-        Settings.SAFE_HEAP = 0 # bitfields do loads on invalid areas, by design
+        if self.emcc_args is None: Settings.SAFE_HEAP = 0 # bitfields do loads on invalid areas, by design
         src = '''
           #include <stdio.h>
           struct bitty {
@@ -3901,7 +3936,7 @@ def process(filename):
       if self.emcc_args is None:
         if Building.LLVM_OPTS: return self.skip('optimizing bitcode before emcc can confuse libcxx inclusion')
         self.emcc_args = [] # libc++ auto-inclusion is only done if we use emcc
-        Settings.SAFE_HEAP = 0 # Some spurious warnings from libc++ internals
+        if self.emcc_args is None: Settings.SAFE_HEAP = 0 # Some spurious warnings from libc++ internals
 
       src = '''
         #include <iostream>
@@ -4026,7 +4061,7 @@ def process(filename):
         ''', 'hello world');
         
     def test_static_variable(self):
-      Settings.SAFE_HEAP = 0 # LLVM mixes i64 and i8 in the guard check
+      if self.emcc_args is None: Settings.SAFE_HEAP = 0 # LLVM mixes i64 and i8 in the guard check
       src = '''
         #include <stdio.h>
 
@@ -4093,7 +4128,7 @@ def process(filename):
         self.emcc_args += ['--closure', '1'] # Use closure here for some additional coverage
 
       Building.COMPILER_TEST_OPTS = [] # remove -g, so we have one test without it by default
-      Settings.SAFE_HEAP = 0 # Has some actual loads of unwritten-to places, in the C++ code...
+      if self.emcc_args is None: Settings.SAFE_HEAP = 0 # Has some actual loads of unwritten-to places, in the C++ code...
 
       # Overflows happen in hash loop
       Settings.CORRECT_OVERFLOWS = 1
@@ -4124,7 +4159,7 @@ def process(filename):
         if Settings.QUANTUM_SIZE == 1: return self.skip('TODO: make this work')
 
         # Overflows in luaS_newlstr hash loop
-        Settings.SAFE_HEAP = 0 # Has various warnings, with copied HEAP_HISTORY values (fixed if we copy 'null' as the type)
+        if self.emcc_args is None: Settings.SAFE_HEAP = 0 # Has various warnings, with copied HEAP_HISTORY values (fixed if we copy 'null' as the type)
         Settings.CORRECT_OVERFLOWS = 1
         Settings.CHECK_OVERFLOWS = 0
         Settings.CORRECT_SIGNS = 1 # Not sure why, but needed
@@ -4183,7 +4218,7 @@ def process(filename):
       Settings.CORRECT_SIGNS_LINES = pgo_data['signs_lines']
       Settings.CORRECT_OVERFLOWS = 0
       Settings.CORRECT_ROUNDINGS = 0
-      Settings.SAFE_HEAP = 0 # uses time.h to set random bytes, other stuff
+      if self.emcc_args is None: Settings.SAFE_HEAP = 0 # uses time.h to set random bytes, other stuff
       Settings.DISABLE_EXCEPTION_CATCHING = 1
       Settings.FAST_MEMORY = 4*1024*1024
       Settings.EXPORTED_FUNCTIONS = ['_main', '_sqlite3_open', '_sqlite3_close', '_sqlite3_exec', '_sqlite3_free', '_callback'];
@@ -4228,7 +4263,7 @@ def process(filename):
                    force_c=True)
 
     def test_the_bullet(self): # Called thus so it runs late in the alphabetical cycle... it is long
-      if Building.LLVM_OPTS: Settings.SAFE_HEAP = 0 # Optimizations make it so we do not have debug info on the line we need to ignore
+      if Building.LLVM_OPTS and self.emcc_args is None: Settings.SAFE_HEAP = 0 # Optimizations make it so we do not have debug info on the line we need to ignore
 
       # Note: this is also a good test of per-file and per-line changes (since we have multiple files, and correct specific lines)
       if Settings.SAFE_HEAP:
@@ -4250,7 +4285,7 @@ def process(filename):
     def test_poppler(self):
       if not self.emcc_args == []: return self.skip('very slow, we only do this in default')
 
-      Settings.SAFE_HEAP = 0 # Has variable object
+      if self.emcc_args is None: Settings.SAFE_HEAP = 0 # Has variable object
 
       Settings.CORRECT_OVERFLOWS = 1
       Settings.CORRECT_SIGNS = 1
@@ -4395,7 +4430,7 @@ def process(filename):
       # Overflows in string_hash
       Settings.CORRECT_OVERFLOWS = 1
       Settings.CHECK_OVERFLOWS = 0
-      Settings.SAFE_HEAP = 0 # Has bitfields which are false positives. Also the PyFloat_Init tries to detect endianness.
+      if self.emcc_args is None: Settings.SAFE_HEAP = 0 # Has bitfields which are false positives. Also the PyFloat_Init tries to detect endianness.
       Settings.CORRECT_SIGNS = 1 # Not sure why, but needed
       Settings.EXPORTED_FUNCTIONS = ['_main', '_PyRun_SimpleStringFlags'] # for the demo
 
@@ -4418,6 +4453,9 @@ def process(filename):
         for name in glob.glob(path_from_root('tests', 'cases', '*.ll')):
           shortname = name.replace('.ll', '')
           if '' not in shortname: continue
+          if '_ta2' in shortname and not Settings.USE_TYPED_ARRAYS == 2:
+            print self.skip('case "%s" only relevant for ta2' % shortname)
+            continue
           print >> sys.stderr, "Testing case '%s'..." % shortname
           output_file = path_from_root('tests', 'cases', shortname + '.txt')
           if Settings.QUANTUM_SIZE == 1:
@@ -4523,58 +4561,6 @@ def process(filename):
       self.do_run(src, '''Profiling data:
 Block 0: ''', post_build=post1)
 
-      # Part 2: old JS version
-
-      Settings.PROFILE = 1
-      Settings.INVOKE_RUN = 0
-
-      src = '''
-          #include <stdio.h>
-
-          int inner1(int x) {
-            for (int i = 0; i < 20; i++)
-              x += x/3;
-            return x;
-          }
-          int inner2(int x) {
-            for (int i = 0; i < 10; i++)
-              x -= x/4;
-            return x;
-          }
-          int inner3(int x) {
-            for (int i = 0; i < 5; i++)
-              x += x/2;
-            x = inner1(x) - inner2(x);
-            for (int i = 0; i < 5; i++)
-              x -= x/2;
-            return x;
-          }
-
-          int main()
-          {
-            int total = 0;
-            for (int i = 0; i < 5000; i++)
-              total += inner1(i) - 4*inner3(i);
-            printf("*%d*\\n", total);
-            return 0;
-          }
-        '''
-
-      post = '''
-def process(filename):
-  src = open(filename, 'a')
-  src.write(\'\'\'
-    startProfiling();
-    run();
-    stopProfiling();
-    printProfiling();
-    print('*ok*');
-  \'\'\')
-  src.close()
-'''
-
-      self.do_run(src, ': __Z6inner1i (5000)\n', post_build=post)
-
     ### Integration tests
 
     def test_scriptaclass(self):
@@ -4813,6 +4799,8 @@ Child2:9
 ''', post_build=post2)
 
     def test_typeinfo(self):
+      if self.emcc_args is not None and self.emcc_args != []: return self.skip('full LLVM opts optimize out all the code that uses the type')
+
       Settings.RUNTIME_TYPE_INFO = 1
       if Settings.QUANTUM_SIZE != 4: return self.skip('We assume normal sizes in the output here')
 
@@ -5024,6 +5012,8 @@ def process(filename):
         assert 'Assertion failed' in str(e), str(e)
 
     def test_linespecific(self):
+      if self.emcc_args: self.emcc_args += ['--llvm-opts', '0'] # llvm full opts make the expected failures here not happen
+
       Settings.CHECK_SIGNS = 0
       Settings.CHECK_OVERFLOWS = 0
 
@@ -5201,8 +5191,9 @@ def process(filename):
 
       def check(output):
         # TODO: check the line #
-        assert 'Overflow|src.cpp:6 : 60 hits, %20 failures' in output, 'no indication of Overflow corrections: ' + output
-        assert 'UnSign|src.cpp:13 : 6 hits, %17 failures' in output, 'no indication of Sign corrections: ' + output
+        if self.emcc_args is None or self.emcc_args == []: # LLVM full opts optimize out some corrections
+          assert 'Overflow|src.cpp:6 : 60 hits, %20 failures' in output, 'no indication of Overflow corrections: ' + output
+          assert 'UnSign|src.cpp:13 : 6 hits, %17 failures' in output, 'no indication of Sign corrections: ' + output
         return output
 
       self.do_run(src, '*186854335,63*\n', output_nicerizer=check)
@@ -5297,7 +5288,7 @@ class %s(T):
     else:
       Settings.I64_MODE = 0
 
-    Building.pick_llvm_opts(3, safe=Building.LLVM_OPTS != 2)
+    Building.pick_llvm_opts(3)
 
 TT = %s
 ''' % (fullname, fullname, fullname, compiler, str(emcc_args), llvm_opts, embetter, quantum_size, typed_arrays, fullname))
@@ -5656,6 +5647,14 @@ f.close()
         output = Popen([NODE_JS, JS_OPTIMIZER, input] + passes, stdin=PIPE, stdout=PIPE).communicate()[0]
         self.assertIdentical(expected, output.replace('\n\n', '\n'))
 
+    def test_reminder(self):
+      assert False, 'Optimize makeGet/SetValue to do 16-bit reads/writes when possible, not just 8'
+      assert False, 'Make sure unaligned loads are not done unnecessarily (add some comments and inspect the source)'
+      assert False, 'Why is libcxx/ created in e.g. test_python, with EMCC_DEBUG, when it does not need libcxx?'
+      assert False, 'Make sure Poppler builds with llvm full opts'
+      assert False, 'Check if we should use -Ox instead of -std-compile-opts'
+      assert False, 'Make it easy to disable full llvm opts and use just normal ones'
+
 elif 'benchmark' in str(sys.argv):
   # Benchmarks. Run them with argument |benchmark|. To run a specific test, do
   # |benchmark.test_X|.
diff --git a/tools/eliminator/eliminator-test-output.js b/tools/eliminator/eliminator-test-output.js
index 594508d0..da9be5cc 100644
--- a/tools/eliminator/eliminator-test-output.js
+++ b/tools/eliminator/eliminator-test-output.js
@@ -105,4 +105,10 @@ function t() {
     __label__ = 4;
   }
 }
-// EMSCRIPTEN_GENERATED_FUNCTIONS: ["f", "g", "h", "py", "r", "t"]
+function f2() {
+  var $_pre = HEAPU32[($vla + ($storemerge312 << 2) | 0) >> 2];
+  var $storemerge312 = $storemerge312 + 1 | 0;
+  var $8 = $_pre;
+  c($8);
+}
+// EMSCRIPTEN_GENERATED_FUNCTIONS: ["f", "g", "h", "py", "r", "t", "f2"]
diff --git a/tools/eliminator/eliminator-test.js b/tools/eliminator/eliminator-test.js
index 02ac0c08..3410499d 100644
--- a/tools/eliminator/eliminator-test.js
+++ b/tools/eliminator/eliminator-test.js
@@ -114,5 +114,13 @@ function t() {
   var $cmp3=($12) < ($13);
   if (!($cmp3)) { __label__ = 4; }
 }
-// EMSCRIPTEN_GENERATED_FUNCTIONS: ["f", "g", "h", "py", "r", "t"]
+function f2() {
+  var $arrayidx64_phi_trans_insert = $vla + ($storemerge312 << 2) | 0;
+  var $_pre = HEAPU32[$arrayidx64_phi_trans_insert >> 2];
+  var $phitmp = $storemerge312 + 1 | 0;
+  var $storemerge312 = $phitmp;
+  var $8 = $_pre;
+  c($8);
+}
+// EMSCRIPTEN_GENERATED_FUNCTIONS: ["f", "g", "h", "py", "r", "t", "f2"]
 
diff --git a/tools/eliminator/eliminator.coffee b/tools/eliminator/eliminator.coffee
index 8b99338a..84e544b2 100644
--- a/tools/eliminator/eliminator.coffee
+++ b/tools/eliminator/eliminator.coffee
@@ -262,8 +262,14 @@ class Eliminator
       else if type is 'var'
         for [varName, varValue] in node[1]
           if varValue? then traverse varValue, checkForMutations
+          # Mark the variable as live
           if @isSingleDef[varName]
             isLive[varName] = true
+          # Mark variables that depend on it as no longer live
+          if @dependsOn[varName]?
+            for varNameDep of @dependsOn[varName]
+              if isLive[varNameDep]
+                isLive[varNameDep] = false
         return node
       else
         checkForMutations node, type
diff --git a/tools/exec_llvm.py b/tools/exec_llvm.py
index 1b1bba1b..5cf55e46 100755
--- a/tools/exec_llvm.py
+++ b/tools/exec_llvm.py
@@ -26,11 +26,8 @@ it runs
   python $(EMSCRIPTEN_TOOLS)/exec_llvm.py THE_FILE PARAMS
 
 An alternative solution to this problem is to compile
-the .ll into native code. This can be done as follows:
-
- * Use llc to generate x86 asm
- * Use as to generate an object file
- * Use g++ to link it to an executable
+the .ll into native code, see nativize_llvm.py. That is
+useful when this fails.
 '''
 
 import os, sys
diff --git a/tools/find_bigis.py b/tools/find_bigis.py
new file mode 100644
index 00000000..d11c1a81
--- /dev/null
+++ b/tools/find_bigis.py
@@ -0,0 +1,18 @@
+'''
+Simple tool to find big i types in an .ll file. Anything over i64 is of interest.
+'''
+
+import os, sys, re
+
+filename = sys.argv[1]
+data = open(filename).read()
+iss = re.findall('[^%]i\d+ [^=]', data)
+set_iss = set(iss)
+bigs = []
+for iss in set_iss:
+  size = int(iss[2:-2])
+  if size > 64:
+    bigs.append(size)
+bigs.sort()
+print bigs
+
diff --git a/tools/nativize_llvm.py b/tools/nativize_llvm.py
new file mode 100644
index 00000000..de78dce2
--- /dev/null
+++ b/tools/nativize_llvm.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+
+'''
+Small utility to build some llvm bitcode into native code. Useful when lli (called
+from exec_llvm) fails for some reason.
+
+ * Use llc to generate x86 asm
+ * Use as to generate an object file
+ * Use g++ to link it to an executable
+'''
+
+import os, sys
+from subprocess import Popen, PIPE, STDOUT
+
+__rootpath__ = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+def path_from_root(*pathelems):
+  return os.path.join(__rootpath__, *pathelems)
+exec(open(path_from_root('tools', 'shared.py'), 'r').read())
+
+filename = sys.argv[1]
+libs = sys.argv[2:] # e.g.: dl for dlopen/dlclose, util for openpty/forkpty
+
+print 'bc => clean bc'
+Popen([LLVM_OPT, filename, '-strip-debug', '-o=' + filename + '.clean.bc']).communicate()[0]
+print 'bc => s'
+Popen([LLVM_COMPILER, filename + '.clean.bc', '-o=' + filename + '.s']).communicate()[0]
+print 's => o'
+Popen(['as', filename + '.s', '-o', filename + '.o']).communicate()[0]
+print 'o => runnable'
+Popen(['g++', path_from_root('system', 'lib', 'debugging.cpp'), filename + '.o', '-o', filename + '.run'] + ['-l' + lib for lib in libs]).communicate()[0]
+
diff --git a/tools/shared.py b/tools/shared.py
index f20fc75c..81c7fcf8 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -440,9 +440,9 @@ class Building:
   # @param opt Either an integer, in which case it is the optimization level (-O1, -O2, etc.), or a list of raw
   #            optimization passes passed to llvm opt
   @staticmethod
-  def llvm_opt(filename, opts, safe=True):
+  def llvm_opt(filename, opts):
     if type(opts) is int:
-      opts = Building.pick_llvm_opts(opts, safe)
+      opts = Building.pick_llvm_opts(opts)
     output = Popen([LLVM_OPT, filename] + opts + ['-o=' + filename + '.opt.bc'], stdout=PIPE).communicate()[0]
     assert os.path.exists(filename + '.opt.bc'), 'Failed to run llvm optimizations: ' + output
     shutil.move(filename + '.opt.bc', filename)
@@ -528,7 +528,7 @@ class Building:
     return filename + '.o.js'
 
   @staticmethod
-  def pick_llvm_opts(optimization_level, safe=True):
+  def pick_llvm_opts(optimization_level):
     '''
       It may be safe to use nonportable optimizations (like -OX) if we remove the platform info from the .ll
       (which we do in do_ll_opts) - but even there we have issues (even in TA2) with instruction combining
@@ -539,14 +539,16 @@ class Building:
 
         llvm-as < /dev/null | opt -std-compile-opts -disable-output -debug-pass=Arguments
     '''
+    safe = Settings.USE_TYPED_ARRAYS != 2 or Settings.BUILD_AS_SHARED_LIB or Settings.LINKABLE
+    print 'LLVM opts, safe?', safe
     opts = []
     if optimization_level > 0:
-      #opts.append('-disable-inlining') # we prefer to let closure compiler do our inlining
       if not safe:
+        opts.append('-disable-inlining') # we prefer to let closure compiler do our inlining, to avoid overly aggressive inlining
         #opts.append('-O%d' % optimization_level)
         opts.append('-std-compile-opts')
         opts.append('-std-link-opts')
-        print 'Unsafe:', opts,
+        print 'Unsafe:', opts
       else:
         allow_nonportable = not safe
         optimize_size = True