15 files changed, 313 insertions, 192 deletions
diff --git a/emcc b/emcc
index b1feeb59..42200428 100755
--- a/emcc
+++ b/emcc
@@ -77,6 +77,8 @@ import os, sys, shutil, tempfile
 from subprocess import Popen, PIPE, STDOUT
 from tools import shared
 
+MAX_LLVM_OPT_LEVEL = 3
+
 DEBUG = os.environ.get('EMCC_DEBUG')
 TEMP_DIR = os.environ.get('EMCC_TEMP_DIR')
 LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll files into .bc, just compile them with emscripten directly
@@ -84,10 +86,16 @@ LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll
                                                            # specific need.
                                                            # One major limitation with this mode is that dlmalloc and libc++ cannot be
                                                            # added in. Also, LLVM optimizations will not be done, nor dead code elimination
+AUTODEBUG = os.environ.get('EMCC_AUTODEBUG') # If set to 1, we will run the autodebugger (the automatic debugging tool, see tools/autodebugger).
+                                             # Note that this will disable inclusion of libraries. This is useful because including
+                                             # dlmalloc makes it hard to compare native and js builds
 
 if DEBUG: print >> sys.stderr, 'emcc: ', ' '.join(sys.argv)
 if DEBUG and LEAVE_INPUTS_RAW: print >> sys.stderr, 'emcc: leaving inputs raw'
 
+stdout = PIPE if not DEBUG else None # suppress output of child processes
+stderr = PIPE if not DEBUG else None # unless we are in DEBUG mode
+
 shared.check_sanity()
 
 # Handle some global flags
@@ -114,7 +122,7 @@ Most normal gcc/g++ options will work, for example:
 
 Options that are modified or new in %s include:
   -O0                      No optimizations (default)
-  -O1                      Simple optimizations, including safe LLVM
+  -O1                      Simple optimizations, including LLVM -O1
                            optimizations, and no runtime assertions
                            or C++ exception catching (to re-enable
                            C++ exception catching, use
@@ -123,7 +131,8 @@ Options that are modified or new in %s include:
                            compiling to JavaScript, not to intermediate
                            bitcode.
   -O2                      As -O1, plus the relooper (loop recreation),
-                           plus closure compiler advanced opts
+                           plus closure compiler advanced opts, plus
+                           LLVM -O2 optimizations
                            Warning: Compiling with this takes a long time!
   -O3                      As -O2, plus dangerous optimizations that may
                            break the generated code! If that happens, try
@@ -134,12 +143,8 @@ Options that are modified or new in %s include:
   --typed-arrays <mode>    0: No typed arrays
                            1: Parallel typed arrays
                            2: Shared (C-like) typed arrays (default)
-  --llvm-opts <level>      0: No LLVM optimizations (default in -O0)
-                           1: Safe/portable LLVM optimizations
-                              (default in -O1 and above)
-                           2: Full, unsafe/unportable LLVM optimizations;
-                              this will almost certainly break the
-                              generated code!
+  --llvm-opts <on>         0: No LLVM optimizations (default in -O0)
+                           1: LLVM optimizations (default in -O1 +)
   --closure <on>           0: No closure compiler (default in -O0, -O1)
                            1: Run closure compiler (default in -O2, -O3)
   --js-transform <cmd>     <cmd> will be called on the generated code
@@ -223,8 +228,6 @@ def unsuffixed(name):
 def unsuffixed_basename(name):
   return os.path.basename(unsuffixed(name))
 
-LLVM_INTERNAL_OPT_LEVEL = 2
-
 # ---------------- End configs -------------
 
 if len(sys.argv) == 1 or sys.argv[1] in ['x', 't']:
@@ -278,7 +281,7 @@ try:
   newargs = sys.argv[1:]
 
   opt_level = 0
-  llvm_opt_level = None
+  llvm_opts = None
   closure = None
   js_transform = None
   compress_whitespace = None
@@ -296,8 +299,7 @@ try:
       newargs[i] = ''
     elif newargs[i].startswith('--llvm-opts'):
       check_bad_eq(newargs[i])
-      llvm_opt_level = eval(newargs[i+1])
-      assert 0 <= llvm_opt_level <= 1, 'Only two levels of LLVM optimizations are supported so far, 0 (none) and 1 (safe)'
+      llvm_opts = eval(newargs[i+1])
       newargs[i] = ''
       newargs[i+1] = ''
     elif newargs[i].startswith('--closure'):
@@ -323,7 +325,7 @@ try:
       newargs[i+1] = ''
   newargs = [ arg for arg in newargs if arg is not '' ]
 
-  if llvm_opt_level is None: llvm_opt_level = 1 if opt_level >= 1 else 0
+  if llvm_opts is None: llvm_opts = 1 if opt_level >= 1 else 0
   if closure is None: closure = 1 if opt_level >= 2 else 0
   if compress_whitespace is None:
     compress_whitespace = closure # if closure is run, compress whitespace
@@ -421,7 +423,7 @@ try:
 
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in ['js', 'html']:
-    if llvm_opt_level > 0:
+    if llvm_opts > 0:
       print >> sys.stderr, 'emcc: warning: -Ox flags ignored, since not generating JavaScript'
     if not specified_target:
       for input_file in input_files:
@@ -446,7 +448,7 @@ try:
 
   extra_files_to_link = []
 
-  if not LEAVE_INPUTS_RAW:
+  if not LEAVE_INPUTS_RAW and not AUTODEBUG:
     # Check if we need to include some libraries that we compile. (We implement libc ourselves in js, but
     # compile a malloc implementation and stdlibc++.)
     # Note that we assume a single symbol is enough to know if we have/do not have dlmalloc etc. If you
@@ -455,9 +457,9 @@ try:
     # dlmalloc
     def create_dlmalloc():
       if DEBUG: print >> sys.stderr, 'emcc: building dlmalloc for cache'
-      Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=PIPE, stderr=PIPE).communicate()
+      Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=stdout, stderr=stderr).communicate()
       # we include the libc++ new stuff here, so that the common case of using just new/delete is quick to link
-      Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=PIPE, stderr=PIPE).communicate()
+      Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=stdout, stderr=stderr).communicate()
       shared.Building.link([in_temp('dlmalloc.o'), in_temp('new.o')], in_temp('dlmalloc_full.o'))
       return in_temp('dlmalloc_full.o')
     def fix_dlmalloc():
@@ -522,16 +524,16 @@ try:
       shutil.move(in_temp(unsuffixed_basename(input_files[0]) + '.o'), in_temp(target_basename + '.bc'))
 
   # Optimize, if asked to
-  if llvm_opt_level > 0 and not LEAVE_INPUTS_RAW:
+  if llvm_opts > 0 and opt_level > 0 and not LEAVE_INPUTS_RAW:
     if DEBUG: print >> sys.stderr, 'emcc: LLVM opts'
-    shared.Building.llvm_opt(in_temp(target_basename + '.bc'), LLVM_INTERNAL_OPT_LEVEL, safe=llvm_opt_level < 2)
+    shared.Building.llvm_opt(in_temp(target_basename + '.bc'), min(opt_level, MAX_LLVM_OPT_LEVEL))
   else:
     # If possible, remove dead functions etc., this potentially saves a lot in the size of the generated code (and the time to compile it)
     if not LEAVE_INPUTS_RAW and not shared.Settings.BUILD_AS_SHARED_LIB and not shared.Settings.LINKABLE:
       if DEBUG: print >> sys.stderr, 'emcc: LLVM dead globals elimination'
       shared.Building.llvm_opt(in_temp(target_basename + '.bc'), ['-internalize', '-globaldce'])
 
-  # Emscripten
+  # Prepare .ll for Emscripten
   try:
     if shared.Settings.RELOOP:
       print >> sys.stderr, 'emcc: warning: The relooper optimization can be very slow.'
@@ -556,6 +558,12 @@ try:
     final = input_files[0]
   if DEBUG: save_intermediate('ll', 'll')
 
+  if AUTODEBUG:
+    Popen(['python', shared.AUTODEBUGGER, final, final + '.ad.ll']).communicate()[0]
+    final += '.ad.ll'
+    if DEBUG: save_intermediate('autodebug', 'll')
+
+  # Emscripten
   if DEBUG: print >> sys.stderr, 'emcc: LLVM => JS'
   final = shared.Building.emscripten(final, append_ext=False)
   if DEBUG: save_intermediate('original')
diff --git a/src/analyzer.js b/src/analyzer.js
index 7412be6d..a724d229 100644
--- a/src/analyzer.js
+++ b/src/analyzer.js
@@ -463,6 +463,12 @@ function analyzer(data, sidePass) {
       item.functions.forEach(function(func) {
         func.lines.forEach(function(line, i) {
           if (line.intertype === 'assign' && line.value.intertype === 'load') {
+            // Floats have no concept of signedness. Mark them as 'signed', which is the default, for which we do nothing
+            if (line.value.type in Runtime.FLOAT_TYPES) {
+              line.value.unsigned = false;
+              return;
+            }
+            // Booleans are always unsigned
             var data = func.variables[line.ident];
             if (data.type === 'i1') {
               line.value.unsigned = true;
diff --git a/src/jsifier.js b/src/jsifier.js
index 657a9673..7bff588c 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -71,7 +71,7 @@ function JSify(data, functionsOnly, givenFunctions) {
         }
       }
     } else {
-      libFuncsToInclude = ['memset', 'malloc', 'free'];
+      libFuncsToInclude = ['memcpy', 'memset', 'malloc', 'free'];
     }
     libFuncsToInclude.forEach(function(ident) {
       data.functionStubs.push({
diff --git a/src/library.js b/src/library.js
index 5a429131..ad1ff696 100644
--- a/src/library.js
+++ b/src/library.js
@@ -2260,7 +2260,6 @@ LibraryManager.library = {
       } else if (type == 'i64') {
         ret = [{{{ makeGetValue('varargs', 'argIndex', 'i32', undefined, undefined, true) }}},
                {{{ makeGetValue('varargs', 'argIndex+4', 'i32', undefined, undefined, true) }}}];
-        ret = unSign(ret[0], 32) + unSign(ret[1], 32)*Math.pow(2, 32); // Unsigned in this notation. Signed later if needed. // XXX - loss of precision
 #else
       } else if (type == 'i64') {
         ret = {{{ makeGetValue('varargs', 'argIndex', 'i64', undefined, undefined, true) }}};
@@ -2270,7 +2269,7 @@ LibraryManager.library = {
         ret = {{{ makeGetValue('varargs', 'argIndex', 'i32', undefined, undefined, true) }}};
       }
       argIndex += Runtime.getNativeFieldSize(type);
-      return Number(ret);
+      return ret;
     }
 
     var ret = [];
@@ -2392,6 +2391,12 @@ LibraryManager.library = {
           var signed = next == 'd'.charCodeAt(0) || next == 'i'.charCodeAt(0);
           argSize = argSize || 4;
           var currArg = getNextArg('i' + (argSize * 8));
+#if I64_MODE == 1
+          // Flatten i64-1 [low, high] into a (slightly rounded) double
+          if (argSize == 8) {
+            currArg = Runtime.makeBigInt(currArg[0], currArg[1], next == 'u'.charCodeAt(0));
+          }
+#endif
           // Truncate to requested size.
           if (argSize <= 4) {
             var limit = Math.pow(256, argSize) - 1;
@@ -3444,7 +3449,7 @@ LibraryManager.library = {
   },
   strtoll__deps: ['_parseInt'],
   strtoll: function(str, endptr, base) {
-    return __parseInt(str, endptr, base, -9223372036854775808, 9223372036854775807, 64);  // LLONG_MIN, LLONG_MAX; imprecise.
+    return __parseInt(str, endptr, base, -9223372036854775200, 9223372036854775200, 64);  // LLONG_MIN, LLONG_MAX; imprecise.
   },
   strtol__deps: ['_parseInt'],
   strtol: function(str, endptr, base) {
diff --git a/src/parseTools.js b/src/parseTools.js
index ad6f2830..49e5b411 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -558,22 +558,13 @@ function makeInlineCalculation(expression, value, tempVar) {
   return '(' + expression.replace(/VALUE/g, value) + ')';
 }
 
-// Given two 32-bit unsigned parts of an emulated 64-bit number, combine them into a JS number (double).
-// Rounding is inevitable if the number is large. This is a particular problem for small negative numbers
-// (-1 will be rounded!), so handle negatives separately and carefully
-function makeBigInt(low, high) {
-  // here VALUE will be the big part
-  return '(' + high + ' <= 2147483648 ? (' + makeSignOp(low, 'i32', 'un', 1, 1) + '+(' + makeSignOp(high, 'i32', 'un', 1, 1) + '*4294967296))' +
-                                    ' : (' + makeSignOp(low, 'i32', 're', 1, 1) + '+(1+' + makeSignOp(high, 'i32', 're', 1, 1) + ')*4294967296))';
-}
-
 // Makes a proper runtime value for a 64-bit value from low and high i32s. low and high are assumed to be unsigned.
 function makeI64(low, high) {
   high = high || '0';
   if (I64_MODE == 1) {
     return '[' + makeSignOp(low, 'i32', 'un', 1, 1) + ',' + makeSignOp(high, 'i32', 'un', 1, 1) + ']';
   } else {
-    if (high) return makeBigInt(low, high);
+    if (high) return RuntimeGenerator.makeBigInt(low, high);
     return low;
   }
 }
@@ -589,7 +580,7 @@ function splitI64(value) {
 }
 function mergeI64(value) {
   assert(I64_MODE == 1);
-  return makeInlineCalculation(makeBigInt('VALUE[0]', 'VALUE[1]'), value, 'tempI64');
+  return makeInlineCalculation(RuntimeGenerator.makeBigInt('VALUE[0]', 'VALUE[1]'), value, 'tempI64');
 }
 
 // Takes an i64 value and changes it into the [low, high] form used in i64 mode 1. In that
@@ -912,23 +903,32 @@ function makeGetValue(ptr, pos, type, noNeedFirst, unsigned, ignore, align, noSa
             'tempDoubleF64[0])';
   }
 
-  if (EMULATE_UNALIGNED_ACCESSES && USE_TYPED_ARRAYS == 2 && align && isIntImplemented(type)) { // TODO: support unaligned doubles and floats
+  if (USE_TYPED_ARRAYS == 2 && align) {
     // Alignment is important here. May need to split this up
     var bytes = Runtime.getNativeTypeSize(type);
     if (bytes > align) {
-      var ret = '/* unaligned */(';
-      if (bytes <= 4) {
-        for (var i = 0; i < bytes; i++) {
-          ret += 'tempInt' + (i == 0 ? '=' : (i < bytes-1 ? '+=((' : '+(('));
-          ret += makeSignOp(makeGetValue(ptr, getFastValue(pos, '+', i), 'i8', noNeedFirst, unsigned, ignore), 'i8', 'un', true);
-          if (i > 0) ret += ')<<' + (8*i) + ')';
-          if (i < bytes-1) ret += ',';
+      var ret = '(';
+      if (isIntImplemented(type)) {
+        if (bytes <= 4) {
+          for (var i = 0; i < bytes; i++) {
+            ret += 'tempInt' + (i == 0 ? '=' : '|=((');
+            ret += makeGetValue(ptr, getFastValue(pos, '+', i), 'i8', noNeedFirst, 1, ignore);
+            if (i > 0) ret += ')<<' + (8*i) + ')';
+            ret += ',';
+          }
+          ret += makeSignOp('tempInt', type, unsigned ? 'un' : 're', true);
+        } else {
+          assert(bytes == 8);
+          ret += 'tempBigInt=' + makeGetValue(ptr, pos, 'i32', noNeedFirst, true, ignore, align) + ',';
+          ret += 'tempBigInt2=' + makeGetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'i32', noNeedFirst, true, ignore, align) + ',';
+          ret += makeI64('tempBigInt', 'tempBigInt2');
         }
       } else {
-        assert(bytes == 8);
-        ret += 'tempBigInt=' + makeGetValue(ptr, pos, 'i32', noNeedFirst, true, ignore, align) + ',';
-        ret += 'tempBigInt2=' + makeGetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'i32', noNeedFirst, true, ignore, align) + ',';
-        ret += makeI64('tempBigInt', 'tempBigInt2');
+        if (type == 'float') {
+          ret += 'copyTempFloat(' + getFastValue(ptr, '+', pos) + '),tempDoubleF32[0]';
+        } else {
+          ret += 'copyTempDouble(' + getFastValue(ptr, '+', pos) + '),tempDoubleF64[0]';
+        }
       }
       ret += ')';
       return ret;
@@ -994,22 +994,27 @@ function makeSetValue(ptr, pos, value, type, noNeedFirst, ignore, align, noSafe)
             makeSetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'tempDoubleI32[1]', 'i32', noNeedFirst, ignore, align) + ')';
   }
 
-  if (EMULATE_UNALIGNED_ACCESSES && USE_TYPED_ARRAYS == 2 && align && isIntImplemented(type)) { // TODO: support unaligned doubles and floats
+  if (USE_TYPED_ARRAYS == 2 && align) {
     // Alignment is important here. May need to split this up
     var bytes = Runtime.getNativeTypeSize(type);
     if (bytes > align) {
-      var ret = '/* unaligned */';
-      if (bytes <= 4) {
-        ret += 'tempBigInt=' + value + ';';
-        for (var i = 0; i < bytes; i++) {
-          ret += makeSetValue(ptr, getFastValue(pos, '+', i), 'tempBigInt&0xff', 'i8', noNeedFirst, ignore) + ';';
-          if (i < bytes-1) ret += 'tempBigInt>>=8;';
+      var ret = '';
+      if (isIntImplemented(type)) {
+        if (bytes <= 4) {
+          ret += 'tempBigInt=' + value + ';';
+          for (var i = 0; i < bytes; i++) {
+            ret += makeSetValue(ptr, getFastValue(pos, '+', i), 'tempBigInt&0xff', 'i8', noNeedFirst, ignore) + ';';
+            if (i < bytes-1) ret += 'tempBigInt>>=8;';
+          }
+        } else {
+          assert(bytes == 8);
+          ret += 'tempPair=' + ensureI64_1(value) + ';';
+          ret += makeSetValue(ptr, pos, 'tempPair[0]', 'i32', noNeedFirst, ignore, align) + ';';
+          ret += makeSetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'tempPair[1]', 'i32', noNeedFirst, ignore, align) + ';';
         }
       } else {
-        assert(bytes == 8);
-        ret += 'tempPair=' + ensureI64_1(value) + ';';
-        ret += makeSetValue(ptr, pos, 'tempPair[0]', 'i32', noNeedFirst, ignore, align) + ';';
-        ret += makeSetValue(ptr, getFastValue(pos, '+', Runtime.getNativeTypeSize('i32')), 'tempPair[1]', 'i32', noNeedFirst, ignore, align) + ';';
+        ret += makeSetValue('tempDoublePtr', 0, value, type, noNeedFirst, ignore, 8) + ';';
+        ret += makeCopyValues(getFastValue(ptr, '+', pos), 'tempDoublePtr', Runtime.getNativeTypeSize(type), type, null, align);
       }
       return ret;
     }
@@ -1465,7 +1470,7 @@ function makeSignOp(value, type, op, force, ignore) {
   var bits, full;
   if (type in Runtime.INT_TYPES) {
     bits = parseInt(type.substr(1));
-    full = op + 'Sign(' + value + ', ' + bits + ', ' + Math.floor(correctSpecificSign() && !PGO) + (
+    full = op + 'Sign(' + value + ', ' + bits + ', ' + Math.floor(ignore || (correctSpecificSign() && !PGO)) + (
       PGO ? ', "' + (ignore ? '' : Debugging.getIdentifier()) + '"' : ''
     ) + ')';
     // Always sign/unsign constants at compile time, regardless of CHECK/CORRECT
@@ -1473,7 +1478,7 @@ function makeSignOp(value, type, op, force, ignore) {
       return eval(full).toString();
     }
   }
-  if (!correctSigns() && !CHECK_SIGNS && !force) return value;
+  if ((ignore || !correctSigns()) && !CHECK_SIGNS && !force) return value;
   if (type in Runtime.INT_TYPES) {
     // shortcuts
     if (!CHECK_SIGNS || ignore) {
@@ -1589,17 +1594,43 @@ function processMathop(item) {
       case 'xor': {
         return '[' + ident1 + '[0] ^ ' + ident2 + '[0], ' + ident1 + '[1] ^ ' + ident2 + '[1]]';
       }
-      case 'shl': {
-        return '[' + ident1 + '[0] << ' + ident2 + ', ' +
-                 '('+ident1 + '[1] << ' + ident2 + ') | ((' + ident1 + '[0]&((Math.pow(2, ' + ident2 + ')-1)<<(32-' + ident2 + '))) >>> (32-' + ident2 + '))]';
-      }
-      case 'ashr': {
-        return '[('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&(Math.pow(2, ' + ident2 + ')-1))<<(32-' + ident2 + ')),' +
-                    ident1 + '[1] >>> ' + ident2 + ']';
-      }
+      case 'shl':
+      case 'ashr':
       case 'lshr': {
-        return '[('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&(Math.pow(2, ' + ident2 + ')-1))<<(32-' + ident2 + ')),' +
-                    ident1 + '[1] >>> ' + ident2 + ']';
+        assert(isNumber(ident2));
+        bits = parseInt(ident2);
+        var ander = Math.pow(2, bits)-1;
+        if (bits < 32) {
+          switch (op) {
+            case 'shl':
+              return '[' + ident1 + '[0] << ' + ident2 + ', ' +
+                       '('+ident1 + '[1] << ' + ident2 + ') | ((' + ident1 + '[0]&(' + ander + '<<' + (32 - bits) + ')) >>> (32-' + ident2 + '))]';
+            case 'ashr':
+              return '[((('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&' + ander + ')<<' + (32 - bits) + ')) >> 0) >>> 0,' +
+                          '(' + ident1 + '[1] >> ' + ident2 + ') >>> 0]';
+            case 'lshr':
+              return '[(('+ident1 + '[0] >>> ' + ident2 + ') | ((' + ident1 + '[1]&' + ander + ')<<' + (32 - bits) + ')) >>> 0,' +
+                          ident1 + '[1] >>> ' + ident2 + ']';
+          }
+        } else if (bits == 32) {
+          switch (op) {
+            case 'shl':
+              return '[0, ' + ident1 + '[0]]';
+            case 'ashr':
+              return '[' + ident1 + '[1], (' + ident1 + '[1]|0) < 0 ? ' + ander + ' : 0]';
+            case 'lshr':
+              return '[' + ident1 + '[1], 0]';
+          }
+        } else { // bits > 32
+          switch (op) {
+            case 'shl':
+              return '[0, ' + ident1 + '[0] << ' + (bits - 32) + ']';
+            case 'ashr':
+              return '[(' + ident1 + '[1] >> ' + (bits - 32) + ') >>> 0, (' + ident1 + '[1]|0) < 0 ? ' + ander + ' : 0]';
+            case 'lshr':
+              return '[' + ident1 + '[1] >>> ' + (bits - 32) + ', 0]';
+          }
+        }
       }
       case 'uitofp': case 'sitofp': return ident1 + '[0] + ' + ident1 + '[1]*4294967296';
       case 'fptoui': case 'fptosi': return splitI64(ident1);
diff --git a/src/preamble.js b/src/preamble.js
index b9da766f..94add7f4 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -344,13 +344,6 @@ var tempValue, tempInt, tempBigInt, tempInt2, tempBigInt2, tempPair, tempBigIntI
 #if I64_MODE == 1
 var tempI64, tempI64b;
 #endif
-#if DOUBLE_MODE == 1
-#if USE_TYPED_ARRAYS == 2
-var tempDoubleBuffer = new ArrayBuffer(8);
-var tempDoubleI32 = new Int32Array(tempDoubleBuffer);
-var tempDoubleF64 = new Float64Array(tempDoubleBuffer);
-#endif
-#endif
 
 function abort(text) {
   print(text + ':\n' + (new Error).stack);
@@ -369,6 +362,7 @@ function assert(condition, text) {
 // makeSetValue is done at compile-time and generates the needed
 // code then, whereas this function picks the right code at
 // run-time.
+// Note that setValue and getValue only do *aligned* writes and reads!
 
 function setValue(ptr, value, type, noSafe) {
   type = type || 'i8';
@@ -646,6 +640,33 @@ Module['HEAPF32'] = HEAPF32;
 STACK_ROOT = STACKTOP = Runtime.alignMemory(STATICTOP);
 STACK_MAX = STACK_ROOT + TOTAL_STACK;
 
+#if DOUBLE_MODE == 1
+#if USE_TYPED_ARRAYS == 2
+var tempDoublePtr = Runtime.alignMemory(STACK_MAX, 8);
+var tempDoubleI8  = HEAP8.subarray(tempDoublePtr);
+var tempDoubleI32 = HEAP32.subarray(tempDoublePtr >> 2);
+var tempDoubleF32 = HEAPF32.subarray(tempDoublePtr >> 2);
+var tempDoubleF64 = new Float64Array(HEAP8.buffer).subarray(tempDoublePtr >> 3);
+function copyTempFloat(ptr) { // functions, because inlining this code is increases code size too much
+  tempDoubleI8[0] = HEAP8[ptr];
+  tempDoubleI8[1] = HEAP8[ptr+1];
+  tempDoubleI8[2] = HEAP8[ptr+2];
+  tempDoubleI8[3] = HEAP8[ptr+3];
+}
+function copyTempDouble(ptr) {
+  tempDoubleI8[0] = HEAP8[ptr];
+  tempDoubleI8[1] = HEAP8[ptr+1];
+  tempDoubleI8[2] = HEAP8[ptr+2];
+  tempDoubleI8[3] = HEAP8[ptr+3];
+  tempDoubleI8[4] = HEAP8[ptr+4];
+  tempDoubleI8[5] = HEAP8[ptr+5];
+  tempDoubleI8[6] = HEAP8[ptr+6];
+  tempDoubleI8[7] = HEAP8[ptr+7];
+}
+STACK_MAX = tempDoublePtr + 8;
+#endif
+#endif
+
 STATICTOP = alignMemoryPage(STACK_MAX);
 
 function callRuntimeCallbacks(callbacks) {
diff --git a/src/runtime.js b/src/runtime.js
index 6439d0ed..6f17028a 100644
--- a/src/runtime.js
+++ b/src/runtime.js
@@ -73,6 +73,15 @@ var RuntimeGenerator = {
       quantum = '(quantum ? quantum : {{{ QUANTUM_SIZE }}})';
     }
     return target + ' = ' + Runtime.forceAlign(target, quantum);
+  },
+
+  // Given two 32-bit unsigned parts of an emulated 64-bit number, combine them into a JS number (double).
+  // Rounding is inevitable if the number is large. This is a particular problem for small negative numbers
+  // (-1 will be rounded!), so handle negatives separately and carefully
+  makeBigInt: function(low, high, unsigned) {
+    return '(' + unsigned +
+           ' ? (' + makeSignOp(low, 'i32', 'un', 1, 1) + '+(' + makeSignOp(high, 'i32', 'un', 1, 1) + '*4294967296))' +
+           ' : (' + makeSignOp(low, 'i32', 'un', 1, 1) + '+(' + makeSignOp(high, 'i32', 're', 1, 1) + '*4294967296)))';
   }
 };
 
@@ -260,6 +269,7 @@ var Runtime = {
 Runtime.stackAlloc = unInline('stackAlloc', ['size']);
 Runtime.staticAlloc = unInline('staticAlloc', ['size']);
 Runtime.alignMemory = unInline('alignMemory', ['size', 'quantum']);
+Runtime.makeBigInt = unInline('makeBigInt', ['low', 'high', 'unsigned']);
 
 function getRuntime() {
   var ret = 'var Runtime = {\n';
diff --git a/src/settings.js b/src/settings.js
index 7e900ea9..ae07b1f4 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -73,13 +73,6 @@ var DOUBLE_MODE = 1; // How to load and store 64-bit doubles. Without typed arra
                      // then load it aligned, and that load-store will make JS engines alter it if it is being
                      // stored to a typed array for security reasons. That will 'fix' the number from being a
                      // NaN or an infinite number.
-var EMULATE_UNALIGNED_ACCESSES = 0; // If set, the compiler will 'emulate' loads and stores that are not known to
-                                    // be sufficiently aligned, by working on individual bytes. This can be
-                                    // important in USE_TYPED_ARRAYS == 2, where unaligned accesses do not work,
-                                    // specifically in the case where unsafe LLVM optimizations have generated possibly
-                                    // unaligned code. (Without unsafe LLVM optimizations, there should be no
-                                    // need for this option.)
-                                    // Currently this only works for integers, not doubles and floats.
 
 var CLOSURE_ANNOTATIONS = 0; // If set, the generated code will be annotated for the closure
                              // compiler. This potentially lets closure optimize the code better.
diff --git a/system/lib/debugging.cpp b/system/lib/debugging.cpp
new file mode 100644
index 00000000..ff9e0d68
--- /dev/null
+++ b/system/lib/debugging.cpp
@@ -0,0 +1,22 @@
+
+// Some stuff to patch up an emscripten-sdk build so it can be built natively (see nativize_llvm)
+
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" {
+
+int *__errno()
+{
+  static int e = 0;
+  return &e;
+}
+
+void __assert_func(const char *file, int line, const char *assertt, const char *cond)
+{
+  printf("assert-func: %s : %d : %s : %s\n", file, line, assertt, cond);
+  abort();
+}
+
+}
+
diff --git a/tests/parseInt/output.txt b/tests/parseInt/output.txt
index e345e2ac..7ab00631 100644
--- a/tests/parseInt/output.txt
+++ b/tests/parseInt/output.txt
@@ -1,6 +1,6 @@
 strtol("-9223372036854775809") = -2147483648
 ERR 34
-strtoll("-9223372036854775809") = 9223372036854776000
+strtoll("-9223372036854775809") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775809") = 4294967295
 ERR 34
@@ -8,7 +8,7 @@ strtoull("-9223372036854775809") = 9223372036854774000
 
 strtol("-9223372036854775808") = -2147483648
 ERR 34
-strtoll("-9223372036854775808") = 9223372036854776000
+strtoll("-9223372036854775808") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775808") = 4294967295
 ERR 34
@@ -16,7 +16,7 @@ strtoull("-9223372036854775808") = 9223372036854774000
 
 strtol("-9223372036854775807") = -2147483648
 ERR 34
-strtoll("-9223372036854775807") = 9223372036854776000
+strtoll("-9223372036854775807") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775807") = 4294967295
 ERR 34
@@ -24,7 +24,7 @@ strtoull("-9223372036854775807") = 9223372036854774000
 
 strtol("-2147483649") = -2147483648
 ERR 34
-strtoll("-2147483649") = -2147483648
+strtoll("-2147483649") = -2147483649
 strtoul("-2147483649") = 2147483647
 strtoull("-2147483649") = 18446744071562068000
 
@@ -34,17 +34,17 @@ strtoul("-2147483648") = 2147483648
 strtoull("-2147483648") = 18446744071562068000
 
 strtol("-2147483647") = -2147483647
-strtoll("-2147483647") = -2147483648
+strtoll("-2147483647") = -2147483647
 strtoul("-2147483647") = 2147483649
 strtoull("-2147483647") = 18446744071562068000
 
 strtol("-5") = -5
-strtoll("-5") = 0
+strtoll("-5") = -5
 strtoul("-5") = 4294967291
 strtoull("-5") = 18446744069414584000
 
 strtol("-1") = -1
-strtoll("-1") = 0
+strtoll("-1") = -1
 strtoul("-1") = 4294967295
 strtoull("-1") = 18446744069414584000
 
@@ -100,7 +100,7 @@ strtoull("4294967296") = 4294967296
 
 strtol("18446744073709551614") = 2147483647
 ERR 34
-strtoll("18446744073709551614") = 9223372036854776000
+strtoll("18446744073709551614") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551614") = 4294967295
 ERR 34
@@ -108,7 +108,7 @@ strtoull("18446744073709551614") = 18446744069414584000
 
 strtol("18446744073709551615") = 2147483647
 ERR 34
-strtoll("18446744073709551615") = 9223372036854776000
+strtoll("18446744073709551615") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551615") = 4294967295
 ERR 34
@@ -116,7 +116,7 @@ strtoull("18446744073709551615") = 18446744069414584000
 
 strtol("18446744073709551616") = 2147483647
 ERR 34
-strtoll("18446744073709551616") = 9223372036854776000
+strtoll("18446744073709551616") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551616") = 4294967295
 ERR 34
diff --git a/tests/parseInt/output_i64mode1.txt b/tests/parseInt/output_i64mode1.txt
index 649500b0..7ab00631 100644
--- a/tests/parseInt/output_i64mode1.txt
+++ b/tests/parseInt/output_i64mode1.txt
@@ -1,6 +1,6 @@
 strtol("-9223372036854775809") = -2147483648
 ERR 34
-strtoll("-9223372036854775809") = 9223372036854776000
+strtoll("-9223372036854775809") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775809") = 4294967295
 ERR 34
@@ -8,7 +8,7 @@ strtoull("-9223372036854775809") = 9223372036854774000
 
 strtol("-9223372036854775808") = -2147483648
 ERR 34
-strtoll("-9223372036854775808") = 9223372036854776000
+strtoll("-9223372036854775808") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775808") = 4294967295
 ERR 34
@@ -16,7 +16,7 @@ strtoull("-9223372036854775808") = 9223372036854774000
 
 strtol("-9223372036854775807") = -2147483648
 ERR 34
-strtoll("-9223372036854775807") = 9223372036854776000
+strtoll("-9223372036854775807") = -9223372036854775000
 ERR 34
 strtoul("-9223372036854775807") = 4294967295
 ERR 34
@@ -24,7 +24,7 @@ strtoull("-9223372036854775807") = 9223372036854774000
 
 strtol("-2147483649") = -2147483648
 ERR 34
-strtoll("-2147483649") = -2147483648
+strtoll("-2147483649") = -2147483649
 strtoul("-2147483649") = 2147483647
 strtoull("-2147483649") = 18446744071562068000
 
@@ -34,19 +34,19 @@ strtoul("-2147483648") = 2147483648
 strtoull("-2147483648") = 18446744071562068000
 
 strtol("-2147483647") = -2147483647
-strtoll("-2147483647") = -2147483648
+strtoll("-2147483647") = -2147483647
 strtoul("-2147483647") = 2147483649
 strtoull("-2147483647") = 18446744071562068000
 
 strtol("-5") = -5
-strtoll("-5") = 0
+strtoll("-5") = -5
 strtoul("-5") = 4294967291
-strtoull("-5") = 18446744073709552000
+strtoull("-5") = 18446744069414584000
 
 strtol("-1") = -1
-strtoll("-1") = 0
+strtoll("-1") = -1
 strtoul("-1") = 4294967295
-strtoull("-1") = 18446744073709552000
+strtoull("-1") = 18446744069414584000
 
 strtol("0") = 0
 strtoll("0") = 0
@@ -100,27 +100,27 @@ strtoull("4294967296") = 4294967296
 
 strtol("18446744073709551614") = 2147483647
 ERR 34
-strtoll("18446744073709551614") = 9223372036854776000
+strtoll("18446744073709551614") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551614") = 4294967295
 ERR 34
-strtoull("18446744073709551614") = 18446744073709552000
+strtoull("18446744073709551614") = 18446744069414584000
 
 strtol("18446744073709551615") = 2147483647
 ERR 34
-strtoll("18446744073709551615") = 9223372036854776000
+strtoll("18446744073709551615") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551615") = 4294967295
 ERR 34
-strtoull("18446744073709551615") = 18446744073709552000
+strtoull("18446744073709551615") = 18446744069414584000
 
 strtol("18446744073709551616") = 2147483647
 ERR 34
-strtoll("18446744073709551616") = 9223372036854776000
+strtoll("18446744073709551616") = 9223372036854775000
 ERR 34
 strtoul("18446744073709551616") = 4294967295
 ERR 34
-strtoull("18446744073709551616") = 18446744073709552000
+strtoull("18446744073709551616") = 18446744069414584000
 
 strtol("0x12", 0, 0) = 18
 strtol("0x12", 0, 10) = 0
diff --git a/tests/runner.py b/tests/runner.py
index 126071a0..eb96160e 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -126,7 +126,7 @@ process(sys.argv[1])
   # Build JavaScript code from source code
   def build(self, src, dirname, filename, output_processor=None, main_file=None, additional_files=[], libraries=[], includes=[], build_ll_hook=None, extra_emscripten_args=[], post_build=None):
 
-    Building.pick_llvm_opts(3, safe=Building.LLVM_OPTS != 2) # pick llvm opts here, so we include changes to Settings in the test case code
+    Building.pick_llvm_opts(3) # pick llvm opts here, so we include changes to Settings in the test case code
 
     # Copy over necessary files for compiling the source
     if main_file is None:
@@ -420,6 +420,17 @@ if 'benchmark' not in str(sys.argv) and 'sanity' not in str(sys.argv):
             #include <stdio.h>
             int main()
             {
+              long long a = 0x2b00505c10;
+              long long b = a >> 29;
+              long long c = a >> 32;
+              long long d = a >> 34;
+              printf("*%Ld,%Ld,%Ld,%Ld*\\n", a, b, c, d);
+              unsigned long long ua = 0x2b00505c10;
+              unsigned long long ub = ua >> 29;
+              unsigned long long uc = ua >> 32;
+              unsigned long long ud = ua >> 34;
+              printf("*%Ld,%Ld,%Ld,%Ld*\\n", ua, ub, uc, ud);
+</