24 files changed, 346 insertions, 172 deletions
diff --git a/AUTHORS b/AUTHORS
index 413b9de1..d71eea47 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -112,3 +112,4 @@ a license to everyone to use it as detailed in LICENSE.)
 * Lu Wang <coolwanglu@gmail.com>
 * Heidi Pan <heidi.pan@intel.com> (copyright owned by Intel)
 * Vasilis Kalintiris <ehostunreach@gmail.com>
+* Adam C. Clifton <adam@hulkamaniac.com>
diff --git a/emscripten.py b/emscripten.py
index 7514b3ca..111c2df4 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -726,7 +726,6 @@ def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None,
   """
 
   assert(settings['ASM_JS']) # TODO: apply ASM_JS even in -O0 for fastcomp
-  assert(settings['RUNNING_JS_OPTS'])
 
   # Overview:
   #   * Run LLVM backend to emit JS. JS includes function bodies, memory initializer,
@@ -741,9 +740,14 @@ def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None,
 
   if DEBUG: shutil.copyfile(infile, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp0.ll'))
 
+  extra_opt_args = []
+  #if DEBUG: extra_opt_args.append('-time-passes')
+
+  if DEBUG: t = time.time()
+
   if DEBUG: logging.debug('  ..1..')
   temp1 = temp_files.get('.1.bc').name
-  shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), infile, '-pnacl-abi-simplify-preopt', '-o', temp1]))
+  shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), infile, '-pnacl-abi-simplify-preopt', '-o', temp1] + extra_opt_args))
   assert os.path.exists(temp1)
   if DEBUG:
     shutil.copyfile(temp1, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp1.bc'))
@@ -760,19 +764,27 @@ def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None,
 
   if DEBUG: logging.debug('  ..3..')
   temp3 = temp_files.get('.3.bc').name
-  shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), temp2, '-pnacl-abi-simplify-postopt', '-o', temp3]))
+  shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'opt'), temp2, '-pnacl-abi-simplify-postopt', '-o', temp3] + extra_opt_args))
   #'-print-after-all'
   assert os.path.exists(temp3)
   if DEBUG:
     shutil.copyfile(temp3, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp3.bc'))
     shared.jsrun.timeout_run(subprocess.Popen([os.path.join(shared.LLVM_ROOT, 'llvm-dis'), 'temp3.bc', '-o', 'temp3.ll']))
 
+  if DEBUG:
+    logging.debug('  emscript: ir simplification took %s seconds' % (time.time() - t))
+    t = time.time()
+
   if DEBUG: logging.debug('  ..4..')
   temp4 = temp_files.get('.4.js').name
   backend_compiler = os.path.join(shared.LLVM_ROOT, 'llc')
   shared.jsrun.timeout_run(subprocess.Popen([backend_compiler, temp3, '-march=js', '-filetype=asm', '-o', temp4], stdout=subprocess.PIPE))
   if DEBUG: shutil.copyfile(temp4, os.path.join(shared.CANONICAL_TEMP_DIR, 'temp4.js'))
 
+  if DEBUG:
+    logging.debug('  emscript: llvm backend took %s seconds' % (time.time() - t))
+    t = time.time()
+
   # Split up output
   backend_output = open(temp4).read()
   #if DEBUG: print >> sys.stderr, backend_output
@@ -801,6 +813,20 @@ def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None,
     table_sizes[k] = str(v.count(',')) # undercounts by one, but that is what we want
   funcs = re.sub(r"#FM_(\w+)#", lambda m: table_sizes[m.groups(0)[0]], funcs)
 
+  # fix +float into float.0, if not running js opts
+  if not settings['RUNNING_JS_OPTS']:
+    def fix_dot_zero(m):
+      num = m.group(3)
+      # TODO: handle 0x floats?
+      if num.find('.') < 0:
+        e = num.find('e');
+        if e < 0:
+          num += '.0'
+        else:
+          num = num[:e] + '.0' + num[e:]
+      return m.group(1) + m.group(2) + num
+    funcs = re.sub(r'([(=,+\-*/%] *)\+(-?)((0x)?[0-9a-f]*\.?[0-9]+([eE][-+]?[0-9]+)?)', lambda m: fix_dot_zero(m), funcs)
+
   # js compiler
 
   if DEBUG: logging.debug('emscript: js compiler glue')
@@ -836,7 +862,9 @@ def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None,
   assert '//FORWARDED_DATA:' in out, 'Did not receive forwarded data in pre output - process failed?'
   glue, forwarded_data = out.split('//FORWARDED_DATA:')
 
-  #print >> sys.stderr, out
+  if DEBUG:
+    logging.debug('  emscript: glue took %s seconds' % (time.time() - t))
+    t = time.time()
 
   last_forwarded_json = forwarded_json = json.loads(forwarded_data)
 
@@ -932,7 +960,10 @@ def emscript_fast(infile, settings, outfile, libraries=[], compiler_engine=None,
         if item == '0': return bad if not newline else (bad + '\n')
         if item not in metadata['implementedFunctions']:
           # this is imported into asm, we must wrap it
-          code = item + '(' + coerced_params + ')'
+          call_ident = item
+          if call_ident in metadata['redirects']: call_ident = metadata['redirects'][call_ident]
+          if not call_ident.startswith('_') and not call_ident.startswith('Math_'): call_ident = '_' + call_ident
+          code = call_ident + '(' + coerced_params + ')'
           if sig[0] != 'v':
             code = 'return ' + shared.JS.make_coercion(code, sig[0], settings)
           code += ';'
@@ -1203,6 +1234,8 @@ Runtime.stackRestore = function(top) { asm['stackRestore'](top) };
 
   outfile.close()
 
+  if DEBUG: logging.debug('  emscript: final python processing took %s seconds' % (time.time() - t))
+
 if os.environ.get('EMCC_FAST_COMPILER'):
   emscript = emscript_fast
 
diff --git a/src/jsifier.js b/src/jsifier.js
index b5502741..6b831b04 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -1859,7 +1859,7 @@ function JSify(data, functionsOnly, givenFunctions) {
         // first row are utilities called from generated code, second are needed from fastLong
         ['i64Add', 'i64Subtract', 'bitshift64Shl', 'bitshift64Lshr', 'bitshift64Ashr',
          'llvm_ctlz_i32', 'llvm_cttz_i32'].forEach(function(func) {
-          if (!Functions.libraryFunctions[func] || (phase == 'glue' && func[0] === 'l')) { // TODO: one-by-one in fastcomp glue mode
+          if (!Functions.libraryFunctions[func] || (phase == 'glue' && func[0] === 'l' && !addedLibraryItems[func])) { // TODO: one-by-one in fastcomp glue mode
             print(processLibraryFunction(LibraryManager.library[func], func)); // must be first to be close to generated code
             Functions.implementedFunctions['_' + func] = LibraryManager.library[func + '__sig'];
             Functions.libraryFunctions[func] = phase == 'glue' ? 2 : 1; // XXX
diff --git a/src/library_fs.js b/src/library_fs.js
index 5412185f..1e7856aa 100644
--- a/src/library_fs.js
+++ b/src/library_fs.js
@@ -961,7 +961,7 @@ mergeInto(LibraryManager.library, {
         throw new FS.ErrnoError(ERRNO_CODES.EACCES);
       }
       if (!stream.stream_ops.mmap) {
-        throw new FS.errnoError(ERRNO_CODES.ENODEV);
+        throw new FS.ErrnoError(ERRNO_CODES.ENODEV);
       }
       return stream.stream_ops.mmap(stream, buffer, offset, length, position, prot, flags);
     },
diff --git a/src/library_glfw.js b/src/library_glfw.js
index 647d4bb6..17e8956a 100644
--- a/src/library_glfw.js
+++ b/src/library_glfw.js
@@ -120,7 +120,6 @@ var LibraryGLFW = {
       if (event.charCode) {
         var char = GLFW.getUnicodeChar(event.charCode);
         if (char !== null && GLFW.charFunc) {
-          event.preventDefault();
           Runtime.dynCall('vii', GLFW.charFunc, [event.charCode, 1]);
         }
       }
@@ -130,13 +129,18 @@ var LibraryGLFW = {
       var key = GLFW.DOMToGLFWKeyCode(event.keyCode);
       if (key && GLFW.keyFunc) {
         GLFW.keys[key] = status;
-        event.preventDefault();
         Runtime.dynCall('vii', GLFW.keyFunc, [key, status]);
       }
     },
 
     onKeydown: function(event) {
       GLFW.onKeyChanged(event, 1);//GLFW_PRESS
+      // This logic comes directly from the sdl implementation. We cannot
+      // call preventDefault on all keydown events otherwise onKeyPress will
+      // not get called
+      if (event.keyCode === 8 /* backspace */ || event.keyCode === 9 /* tab */) {
+        event.preventDefault();
+      }
     },
 
     onKeyup: function(event) {
diff --git a/src/library_sdl.js b/src/library_sdl.js
index 40e5e3ab..2efc1271 100644
--- a/src/library_sdl.js
+++ b/src/library_sdl.js
@@ -1247,10 +1247,7 @@ var LibrarySDL = {
     return 0;
   },
 
-  SDL_LowerBlit__deps: ['SDL_UpperBlit'],
-  SDL_LowerBlit: function(src, srcrect, dst, dstrect) {
-    return _SDL_UpperBlit(src, srcrect, dst, dstrect);
-  },
+  SDL_LowerBlit: 'SDL_UpperBlit',
 
   SDL_FillRect: function(surf, rect, color) {
     var surfData = SDL.surfaces[surf];
diff --git a/src/modules.js b/src/modules.js
index b9b8ab5e..3e7405f8 100644
--- a/src/modules.js
+++ b/src/modules.js
@@ -429,6 +429,26 @@ var LibraryManager = {
       eval(processMacros(preprocess(read(libraries[i]))));
     }
 
+    /*
+    // export code for CallHandlers.h
+    printErr('============================');
+    for (var x in this.library) {
+      var y = this.library[x];
+      if (typeof y === 'string' && x.indexOf('__sig') < 0 && x.indexOf('__postset') < 0 && y.indexOf(' ') < 0) {
+        printErr('DEF_REDIRECT_HANDLER(' + x + ', ' + y + ');');
+      }
+    }
+    printErr('============================');
+    for (var x in this.library) {
+      var y = this.library[x];
+      if (typeof y === 'string' && x.indexOf('__sig') < 0 && x.indexOf('__postset') < 0 && y.indexOf(' ') < 0) {
+        printErr('  SETUP_CALL_HANDLER(' + x + ');');
+      }
+    }
+    printErr('============================');
+    // end export code for CallHandlers.h
+    */
+
     this.loaded = true;
   },
 
diff --git a/tests/cases/caall.ll b/tests/cases/caall.ll
index 5b8f7f29..2cc231ec 100644
--- a/tests/cases/caall.ll
+++ b/tests/cases/caall.ll
@@ -1,6 +1,6 @@
 ; ModuleID = 'tests/hello_world.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 @.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 ; [#uses=1 type=[15 x i8]*]
 
@@ -11,14 +11,14 @@ entry:
   store i32 0, i32* %retval
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32]
   %call12 = call void (i32*)** @_ZNSt3__13mapINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPFvP6ObjectENS_4lessIS6_EENS4_INS_4pairIKS6_SA_EEEEEixERSE_(i32 10)
-  %26 = load void (%class.Object*)** %call12
+  %l26 = load void (i32*)** %call12
   ret i32 1
 }
 
-define (i32*)** @_ZNSt3__13mapINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPFvP6ObjectENS_4lessIS6_EENS4_INS_4pairIKS6_SA_EEEEEixERSE_(i32 %x) {
+define void (i32*)** @_ZNSt3__13mapINS_12basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPFvP6ObjectENS_4lessIS6_EENS4_INS_4pairIKS6_SA_EEEEEixERSE_(i32 %x) {
 entry:
-  %ret = inttoptr i32 0 to (i32*)**
-  ret (i32*)** %ret
+  %ret = inttoptr i32 0 to void (i32*)**
+  ret void (i32*)** %ret
 }
 
 ; [#uses=1]
diff --git a/tests/cases/i24_mem_ta2.ll b/tests/cases/i24_mem_ta2.ll
index e50014ca..550389fe 100644
--- a/tests/cases/i24_mem_ta2.ll
+++ b/tests/cases/i24_mem_ta2.ll
@@ -1,8 +1,8 @@
 ; ModuleID = 'tests/hello_world.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
-@.str = private unnamed_addr constant [15 x i8] c".%x.\0A\00", align 1 ; [#uses=1 type=[5 x i8]*]
+@.str = private unnamed_addr constant [6 x i8] c".%x.\0A\00", align 1 ; [#uses=1 type=[5 x i8]*]
 
 define i32 @main() {
 entry:
@@ -11,11 +11,11 @@ entry:
   %i24 = bitcast i32* %mem to i24*
   %load = load i24* %i24, align 4
   %load32 = zext i24 %load to i32
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %load32)
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 %load32)
   %val_24 = trunc i32 4041265344 to i24
   store i24 %val_24, i24* %i24, align 4
   %load32b = load i32* %mem, align 4
-  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %load32b)
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 %load32b)
   ret i32 1
 }
 
diff --git a/tests/cases/i64toi8star.ll b/tests/cases/i64toi8star.ll
index d4a39340..b2307449 100644
--- a/tests/cases/i64toi8star.ll
+++ b/tests/cases/i64toi8star.ll
@@ -25,8 +25,8 @@ entry:
   %retval = alloca i32                            ; [#uses=2]
   %0 = alloca i32                                 ; [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; [#uses=0]
-  %5 = call i32 @PyLong_FromVoidPtr(i8* null) nounwind ; [#uses=0]
-  %13 = call i32 @PyLong_FromVoidPtr(i8* inttoptr (i64 1 to i8*)) nounwind ; [#uses=0]
-  %1 = call i32 bitcast (i32 (i8*)* @puts to i32 (i32*)*)(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
+  %a5 = call i32 @PyLong_FromVoidPtr(i8* null) nounwind ; [#uses=0]
+  %a13 = call i32 @PyLong_FromVoidPtr(i8* inttoptr (i64 1 to i8*)) nounwind ; [#uses=0]
+  %a1 = call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
   ret i32 0
 }
diff --git a/tests/cases/inttoptr.ll b/tests/cases/inttoptr.ll
index b0711672..c1b40a74 100644
--- a/tests/cases/inttoptr.ll
+++ b/tests/cases/inttoptr.ll
@@ -1,6 +1,6 @@
 ; ModuleID = '/tmp/emscripten/tmp/src.cpp.o'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 @.str = private constant [14 x i8] c"hello, world!\00", align 1 ; [#uses=1]
 
@@ -14,7 +14,7 @@ entry:
   %0 = alloca i32                                 ; [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; [#uses=0]
   %sz.i7 = inttoptr i32 64 to i32*          ; [#uses=1 type=i32*]
-  store i32 184, i32* %sz.i7, align 8, !tbaa !1610
-  %1 = call i32 bitcast (i32 (i8*)* @puts to i32 (i32*)*)(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
+  store i32 184, i32* %sz.i7, align 8
+  %1 = call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
   ret i32 0
 }
diff --git a/tests/cases/invokebitcast.ll b/tests/cases/invokebitcast.ll
index ffb5803f..ec090b0d 100644
--- a/tests/cases/invokebitcast.ll
+++ b/tests/cases/invokebitcast.ll
@@ -1,7 +1,7 @@
 ; ModuleID = '/dev/shm/tmp/src.cpp.o'
 ; Just test for compilation here
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-f128:128:128-n8:16:32"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 %struct.CPU_Regs = type { [8 x %union.GenReg32] }
 %union.GenReg32 = type { [1 x i32] }
@@ -16,7 +16,8 @@ entry:
   %0 = alloca i32                                 ; [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; [#uses=0]
   %1 = load i32* bitcast (i32* getelementptr inbounds (%struct.CPU_Regs* @cpu_regs, i32 0, i32 0, i32 1, i32 0, i32 0) to i32*), align 2 ; [#uses=1]
-  store i16 %1, i16* bitcast (%struct.CPU_Regs* @cpu_regs to i16*), align 2
+  %s = trunc i32 %1 to i16
+  store i16 %s, i16* bitcast (%struct.CPU_Regs* @cpu_regs to i16*), align 2
   %2 = call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
   store i32 0, i32* %0, align 4
   %3 = load i32* %0, align 4                      ; [#uses=1]
diff --git a/tests/cases/phicubed.ll b/tests/cases/phicubed.ll
index a0799997..5fc3208b 100644
--- a/tests/cases/phicubed.ll
+++ b/tests/cases/phicubed.ll
@@ -1,4 +1,6 @@
 ; ModuleID = '/dev/shm/tmp/src.cpp.o'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 %struct.worker_args = type { i32, %struct.worker_args* }
 
diff --git a/tests/cases/phientryimplicit.ll b/tests/cases/phientryimplicit.ll
index 8a510f43..b7b17add 100644
--- a/tests/cases/phientryimplicit.ll
+++ b/tests/cases/phientryimplicit.ll
@@ -1,6 +1,6 @@
 ; ModuleID = 'tests/hello_world.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 ; Phi nodes can refer to the entry. And the entry might be unnamed, and doesn't even have a consistent implicit name!
 
@@ -9,35 +9,35 @@ target triple = "i386-pc-linux-gnu"
 ; [#uses=0]
 define i32 @main() {
   %retval = alloca i32, align 4                   ; [#uses=1 type=i32*]
-  %16 = trunc i32 1 to i1
-  br i1 %16, label %17, label %26, !dbg !1269853  ; [debug line = 3920:5]
+  %a16 = trunc i32 1 to i1
+  br i1 %a16, label %L17, label %L26, !dbg !1269853  ; [debug line = 3920:5]
 
-; <label>:17                                      ; preds = %1
-  %25 = trunc i32 1 to i1
-  br label %26
+L17:
+  %a25 = trunc i32 1 to i1
+  br label %L26
 
-; <label>:26                                      ; preds = %17, %1
-  %27 = phi i1 [ false, %1 ], [ %25, %17 ]        ; [#uses=1 type=i1]
+L26:
+  %a27 = phi i1 [ false, %1 ], [ %25, %L17 ]        ; [#uses=1 type=i1]
   store i32 0, i32* %retval
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32]
-  %cal2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i32 %27) ; make sure %27 is used
+  %cal2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i1 %a27) ; make sure %27 is used
   ret i32 1
 }
 
 define i32 @main0() {
   %retval = alloca i32, align 4                   ; [#uses=1 type=i32*]
-  %16 = trunc i32 1 to i1
-  br i1 %16, label %17, label %26, !dbg !1269853  ; [debug line = 3920:5]
+  %a16 = trunc i32 1 to i1
+  br i1 %a16, label %L17, label %L26, !dbg !1269853  ; [debug line = 3920:5]
 
-; <label>:17                                      ; preds = %1
-  %25 = trunc i32 1 to i1
-  br label %26
+L17:
+  %a25 = trunc i32 1 to i1
+  br label %L26
 
-; <label>:26                                      ; preds = %17, %1
-  %27 = phi i1 [ false, %0 ], [ %25, %17 ]        ; [#uses=1 type=i1]
+L26:
+  %a27 = phi i1 [ false, %0 ], [ %25, %L17 ]        ; [#uses=1 type=i1]
   store i32 0, i32* %retval
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32]
-  %cal2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i32 %27) ; make sure %27 is used
+  %cal2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i1 %a27) ; make sure %27 is used
   ret i32 1
 }
 
diff --git a/tests/cases/phiself.ll b/tests/cases/phiself.ll
index 81249799..0a06fcca 100644
--- a/tests/cases/phiself.ll
+++ b/tests/cases/phiself.ll
@@ -1,6 +1,6 @@
 ; ModuleID = '/tmp/emscripten_temp/src.cpp.o'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 @.str = private unnamed_addr constant [7 x i8] c"cheez\0A\00", align 1
 @.str1 = private unnamed_addr constant [6 x i8] c"*%d*\0A\00", align 1
diff --git a/tests/cases/ptrtoi64.ll b/tests/cases/ptrtoi64.ll
index 01e466fe..5898f529 100644
--- a/tests/cases/ptrtoi64.ll
+++ b/tests/cases/ptrtoi64.ll
@@ -1,8 +1,8 @@
 ; pointer to i64, then to i32
 
 ; ModuleID = '/tmp/emscripten/tmp/src.cpp.o'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 @.str2 = private constant [9 x i8] c"*%d,%d*\0A\00", align 1 ; [#uses=1]
 
@@ -18,10 +18,10 @@ entry:
   %0 = alloca i32                                 ; [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; [#uses=0]
   %sz.i7 = inttoptr i32 400 to i32*          ; [#uses=1 type=i32*]
-  %10 = ptrtoint i32* %sz.i7 to i64, !dbg !8557        ; [#uses=1 type=i64] [debug line = 99:3]
-  %conv5 = trunc i64 %10 to i32, !dbg !8557       ; [#uses=1 type=i32] [debug line = 99:3]
-  %11 = ptrtoint i32* %sz.i7 to i8, !dbg !8557        ; [#uses=1 type=i64] [debug line = 99:3]
-  %conv6 = zext i8 %11 to i32, !dbg !8557       ; [#uses=1 type=i32] [debug line = 99:3]
-  %55 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str2, i32 0, i32 0), i32 %conv5, i32 %conv6) ; [#uses=0]
+  %a10 = ptrtoint i32* %sz.i7 to i64
+  %conv5 = trunc i64 %a10 to i32
+  %a11 = ptrtoint i32* %sz.i7 to i8
+  %conv6 = zext i8 %a11 to i32
+  %a55 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str2, i32 0, i32 0), i32 %conv5, i32 %conv6)
   ret i32 0
 }
diff --git a/tests/cases/sillybitcast.ll b/tests/cases/sillybitcast.ll
index c5ca4f9a..50a54da9 100644
--- a/tests/cases/sillybitcast.ll
+++ b/tests/cases/sillybitcast.ll
@@ -1,6 +1,6 @@
 ; ModuleID = '/tmp/emscripten/tmp/src.cpp.o'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
 
 @.str = private constant [14 x i8] c"hello, world!\00", align 1 ; [#uses=1]
 
diff --git a/tests/cases/sillybitcast2.ll b/tests/cases/sillybitcast2.ll
new file mode 100644
index 00000000..02cf8615
--- /dev/null
+++ b/tests/cases/sillybitcast2.ll
@@ -0,0 +1,35 @@
+; ModuleID = '/tmp/emscripten/tmp/src.cpp.o'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+@.str = private constant [14 x i8] c"hello, world!\00", align 1 ; [#uses=1]
+
+; [#uses=2]
+define void @"_Z5hellov"() {
+entry:
+  %0 = call i32 bitcast (i32 (i32*)* @puts to i32 (i8*)*)(i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; [#uses=1]
+declare i32 @puts(i32*)
+
+; [#uses=0]
+define i32 @main() {
+entry:
+  %retval = alloca i32                            ; [#uses=2]
+  %0 = alloca i32                                 ; [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; [#uses=0]
+  call void @"_Z5hellov"()
+  store i32 0, i32* %0, align 4
+  %1 = load i32* %0, align 4                      ; [#uses=1]
+  store i32 %1, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; [#uses=1]
+  ret i32 %retval1
+}
diff --git a/tests/cases/unaligneddouble.ll b/tests/cases/unaligneddouble.ll
index 22b92741..e4067831 100644
--- a/tests/cases/unaligneddouble.ll
+++ b/tests/cases/unaligneddouble.ll
@@ -10,7 +10,7 @@ entry:
   %retval = alloca i32, align 4                   ; [#uses=1 type=i32*]
   %doub = alloca double, align 4
   store i32 0, i32* %retval
-  %0 = bitcast double* %doub to i32
+  %0 = ptrtoint double* %doub to i32
   %1 = uitofp i32 %0 to double
   store double %1, double* %doub, align 1
   store double %1, double* %doub, align 2
diff --git a/tests/runner.py b/tests/runner.py
index 8a5e1129..37e307e9 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -328,7 +328,10 @@ process(sys.argv[1])
       os.makedirs(ret)
     return ret
 
-  def get_library(self, name, generated_libs, configure=['sh', './configure'], configure_args=[], make=['make'], make_args=['-j', '2'], cache=True, env_init={}, cache_name_extra='', native=False):
+  def get_library(self, name, generated_libs, configure=['sh', './configure'], configure_args=[], make=['make'], make_args='help', cache=True, env_init={}, cache_name_extra='', native=False):
+    if make_args == 'help':
+      make_args = ['-j', str(multiprocessing.cpu_count())]
+
     build_dir = self.get_build_dir()
     output_dir = self.get_dir()
 
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 63e0041f..2f4d26fd 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -14,6 +14,109 @@ DEFAULT_ARG = '4'
 
 TEST_REPS = 2
 
+CORE_BENCHMARKS = True # core benchmarks vs full regression suite
+
+class Benchmarker:
+  def __init__(self, name):
+    self.name = name
+
+  def bench(self, args, output_parser=None):
+    self.times = []
+    for i in range(TEST_REPS):
+      start = time.time()
+      output = self.run(args)
+      if not output_parser:
+        curr = time.time()-start
+      else:
+        curr = output_parser(output)
+      self.times.append(curr)
+
+  def display(self, baseline=None):
+    if baseline == self: baseline = None
+    mean = sum(self.times)/len(self.times)
+    squared_times = map(lambda x: x*x, self.times)
+    mean_of_squared = sum(squared_times)/len(self.times)
+    std = math.sqrt(mean_of_squared - mean*mean)
+    sorted_times = self.times[:]
+    sorted_times.sort()
+    median = sum(sorted_times[len(sorted_times)/2 - 1:len(sorted_times)/2 + 1])/2
+
+    print '   %10s: mean: %4.3f (+-%4.3f) secs  median: %4.3f  range: %4.3f-%4.3f  (noise: %4.3f%%)  (%d runs)' % (self.name, mean, std, median, min(self.times), max(self.times), 100*std/mean, TEST_REPS),
+
+    if baseline:
+      mean_baseline = sum(baseline.times)/len(baseline.times)
+      final = mean / mean_baseline
+      print '  Relative: %.2f X slower' % final
+    else:
+      print
+
+class NativeBenchmarker(Benchmarker):
+  def __init__(self, name, cc, cxx):
+    self.name = name
+    self.cc = cc
+    self.cxx = cxx
+
+  def build(self, parent, filename, args, shared_args, emcc_args, native_args, native_exec):
+    self.parent = parent
+    if not native_exec:
+      compiler = self.cxx if filename.endswith('cpp') else self.cc
+      process = Popen([compiler, '-O2', '-fno-math-errno', filename, '-o', filename+'.native'] + shared_args + native_args, stdout=PIPE, stderr=parent.stderr_redirect)
+      output = process.communicate()
+      if process.returncode is not 0:
+        print >> sys.stderr, "Building native executable with command '%s' failed with a return code %d!" % (' '.join([compiler, '-O2', filename, '-o', filename+'.native']), process.returncode)
+        print "Output: " + output[0]
+    else:
+      print '(using clang)'
+      shutil.copyfile(native_exec, filename + '.native')
+      shutil.copymode(native_exec, filename + '.native')
+    self.filename = filename
+
+  def run(self, args):
+    process = Popen([self.filename+'.native'] + args, stdout=PIPE, stderr=PIPE)
+    return process.communicate()[0]
+
+class JSBenchmarker(Benchmarker):
+  def __init__(self, name, engine, extra_args=[]):
+    self.name = name
+    self.engine = engine
+    self.extra_args = extra_args
+
+  def build(self, parent, filename, args, shared_args, emcc_args, native_args, native_exec):
+    self.filename = filename
+
+    open('hardcode.py', 'w').write('''
+def process(filename):
+  js = open(filename).read()
+  replaced = js.replace("run();", "run(%s.concat(Module[\\"arguments\\"]));")
+  assert js != replaced
+  open(filename, 'w').write(replaced)
+import sys
+process(sys.argv[1])
+''' % str(args[:-1]) # do not hardcode in the last argument, the default arg
+)
+
+    try_delete(filename + '.js')
+    output = Popen([PYTHON, EMCC, filename, #'-O3',
+                    '-O2', '-s', 'DOUBLE_MODE=0', '-s', 'PRECISE_I64_MATH=0',
+                    '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
+                    '-s', 'TOTAL_MEMORY=128*1024*1024',
+                    #'--closure', '1',
+                    #'-g',
+                    '-o', filename + '.js'] + shared_args + emcc_args + self.extra_args, stdout=PIPE, stderr=PIPE).communicate()
+    assert os.path.exists(filename + '.js'), 'Failed to compile file: ' + output[0]
+
+  def run(self, args):
+    return run_js(self.filename + '.js', engine=self.engine, args=args, stderr=PIPE, full_output=True)
+
+# Benchmarkers
+benchmarkers = [
+  NativeBenchmarker('clang', CLANG_CC, CLANG),
+  NativeBenchmarker('gcc', 'gcc', 'g++'),
+  JSBenchmarker('sm-f32', SPIDERMONKEY_ENGINE, ['-s', 'PRECISE_F32=2']),
+  JSBenchmarker('sm',     SPIDERMONKEY_ENGINE),
+  JSBenchmarker('v8',     V8_ENGINE)
+]
+
 class benchmark(RunnerCore):
   save_dir = True
 
@@ -54,41 +157,6 @@ class benchmark(RunnerCore):
     JS_ENGINE = Building.JS_ENGINE_OVERRIDE if Building.JS_ENGINE_OVERRIDE is not None else JS_ENGINES[0]
     print 'Benchmarking JS engine: %s' % JS_ENGINE
 
-  def print_stats(self, times, native_times, last=False, reps=TEST_REPS):
-    if reps == 0:
-      print '(no reps)'
-      return
-    mean = sum(times)/len(times)
-    squared_times = map(lambda x: x*x, times)
-    mean_of_squared = sum(squared_times)/len(times)
-    std = math.sqrt(mean_of_squared - mean*mean)
-    sorted_times = times[:]
-    sorted_times.sort()
-    median = sum(sorted_times[len(sorted_times)/2 - 1:len(sorted_times)/2 + 1])/2
-
-    mean_native = sum(native_times)/len(native_times)