29 files changed, 534 insertions, 309 deletions
diff --git a/AUTHORS b/AUTHORS
index 27206740..229a9cad 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -85,3 +85,4 @@ a license to everyone to use it as detailed in LICENSE.)
 * Ludovic Perrine <jazzzz@gmail.com>
 * David Barksdale <david.barksdale@adcedosolutions.com>
 * Manfred Manik Nerurkar <nerurkar*at*made-apps.biz> (copyright owned by MADE, GmbH)
+* Joseph Gentle <me@josephg.com>
diff --git a/emcc b/emcc
index fb73ef6b..75750e1e 100755
--- a/emcc
+++ b/emcc
@@ -127,11 +127,6 @@ Options that are modified or new in %s include:
                            (For details on the affects of different
                            opt levels, see apply_opt_level() in
                            tools/shared.py and also src/settings.js.)
-                           Note: Optimizations are only done when
-                           compiling to JavaScript, not to intermediate
-                           bitcode, *unless* you build with
-                           EMCC_OPTIMIZE_NORMALLY=1 (not recommended
-                           unless you know what you are doing!)
   -O2                      As -O1, plus the relooper (loop recreation),
                            LLVM -O2 optimizations, and
 
@@ -144,7 +139,7 @@ Options that are modified or new in %s include:
                               -s DOUBLE_MODE=0
                               -s PRECISE_I64_MATH=0
                               --closure 1
-                              --llvm-lto 1
+                              --llvm-lto 3
 
                            This is not recommended at all. A better idea
                            is to try each of these separately on top of
@@ -220,10 +215,15 @@ Options that are modified or new in %s include:
                            2: -O2 LLVM optimizations
                            3: -O3 LLVM optimizations (default in -O2+)
 
-  --llvm-lto <level>       0: No LLVM LTO (default in -O2 and below)
-                           1: LLVM LTO (default in -O3)
+  --llvm-lto <level>       0: No LLVM LTO (default)
+                           1: LLVM LTO is performed
+                           2: We combine all the bitcode and run LLVM opt -O3
+                              on that (which optimizes across modules, but is
+                              not the same as normal LTO), but do not do normal
+                              LTO
+                           3: We do both 2 and then 1
                            Note: If LLVM optimizations are not run
-                           (see --llvm-opts), setting this to 1 has no
+                           (see --llvm-opts), setting this has no
                            effect.
 
   --closure <on>           0: No closure compiler (default in -O2 and below)
@@ -884,10 +884,14 @@ try:
     newargs = newargs + [default_cxx_std]
 
   if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level]
-  if llvm_lto is None: llvm_lto = opt_level >= 3
+  if llvm_lto is None and opt_level >= 3: llvm_lto = 3
   if opt_level == 0: debug_level = 4
   if closure is None and opt_level == 3: closure = True
 
+  if llvm_lto is None and bind:
+    logging.debug('running lto for embind') # XXX this is a workaround for a pointer issue
+    llvm_lto = 1
+
   # TODO: support source maps with js_transform
   if js_transform and debug_level >= 4:
     logging.warning('disabling source maps because a js transform is being done')
@@ -1058,6 +1062,10 @@ try:
   else:
     raise Exception('unknown llvm target: ' + str(shared.LLVM_TARGET))
 
+  if shared.Settings.USE_TYPED_ARRAYS != 2 and llvm_opts > 0:
+    logging.warning('disabling LLVM optimizations, need typed arrays mode 2 for them')
+    llvm_opts = 0
+
   ## Compile source code to bitcode
 
   logging.debug('compiling to bitcode')
@@ -1097,20 +1105,20 @@ try:
           shared.Building.llvm_as(input_file, temp_file)
           temp_files.append(temp_file)
 
-  if not LEAVE_INPUTS_RAW: assert len(temp_files) == len(input_files)
+  if not LEAVE_INPUTS_RAW:
+    assert len(temp_files) == len(input_files)
+
+    # Optimize source files
+    if llvm_opts > 0:
+      for i in range(len(input_files)):
+        input_file = input_files[i]
+        if input_files[i].endswith(SOURCE_SUFFIXES):
+          temp_file = temp_files[i]
+          logging.debug('optimizing %s with -O%d' % (input_file, llvm_opts))
+          shared.Building.llvm_opt(temp_file, llvm_opts)
 
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in JS_CONTAINING_SUFFIXES:
-    if llvm_opts > 0:
-      if not os.environ.get('EMCC_OPTIMIZE_NORMALLY'):
-        logging.warning('-Ox flags ignored, since not generating JavaScript')
-      else:
-        for input_file in input_files:
-          if input_file.endswith(SOURCE_SUFFIXES):
-            logging.debug('optimizing %s with -O%d since EMCC_OPTIMIZE_NORMALLY defined' % (input_file, llvm_opts))
-            shared.Building.llvm_opt(in_temp(unsuffixed(uniquename(input_file)) + '.o'), llvm_opts)
-          else:
-            logging.debug('not optimizing %s despite EMCC_OPTIMIZE_NORMALLY since not source code' % (input_file))
     if not specified_target:
       for input_file in input_files:
         shutil.move(in_temp(unsuffixed(uniquename(input_file)) + '.o'), unsuffixed_basename(input_file) + '.' + final_suffix)
@@ -1142,6 +1150,8 @@ try:
         symbols = filter(lambda symbol: symbol not in exclude, symbols)
       return set(symbols)
 
+    lib_opts = ['-O2']
+
     # XXX We also need to add libc symbols that use malloc, for example strdup. It's very rare to use just them and not
     #     a normal malloc symbol (like free, after calling strdup), so we haven't hit this yet, but it is possible.
     libc_symbols = read_symbols(shared.path_from_root('system', 'lib', 'libc.symbols'))
@@ -1150,7 +1160,7 @@ try:
     libcxx_symbols = read_symbols(shared.path_from_root('system', 'lib', 'libcxx', 'symbols'), exclude=libc_symbols)
     libcxxabi_symbols = read_symbols(shared.path_from_root('system', 'lib', 'libcxxabi', 'symbols'), exclude=libc_symbols)
 
-    # XXX we should disable EMCC_DEBUG (and EMCC_OPTIMIZE_NORMALLY?) when building libs, just like in the relooper
+    # XXX we should disable EMCC_DEBUG when building libs, just like in the relooper
 
     def build_libc(lib_filename, files):
       o_s = []
@@ -1159,7 +1169,7 @@ try:
       musl_internal_includes = shared.path_from_root('system', 'lib', 'libc', 'musl', 'src', 'internal')
       for src in files:
         o = in_temp(os.path.basename(src) + '.o')
-        execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', src), '-o', o, '-I', musl_internal_includes], stdout=stdout, stderr=stderr)
+        execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', src), '-o', o, '-I', musl_internal_includes] + lib_opts, stdout=stdout, stderr=stderr)
         o_s.append(o)
       if prev_cxx: os.environ['EMMAKEN_CXX'] = prev_cxx
       shared.Building.link(o_s, in_temp(lib_filename))
@@ -1170,7 +1180,7 @@ try:
       for src in files:
         o = in_temp(src + '.o')
         srcfile = shared.path_from_root(src_dirname, src)
-        execute([shared.PYTHON, shared.EMXX, srcfile, '-o', o, '-std=c++11'], stdout=stdout, stderr=stderr)
+        execute([shared.PYTHON, shared.EMXX, srcfile, '-o', o, '-std=c++11'] + lib_opts, stdout=stdout, stderr=stderr)
         o_s.append(o)
       shared.Building.link(o_s, in_temp(lib_filename))
       return in_temp(lib_filename)
@@ -1418,16 +1428,14 @@ try:
   if not LEAVE_INPUTS_RAW:
     link_opts = [] if debug_level >= 4 else ['-strip-debug'] # remove LLVM debug if we are not asked for it
 
-    if llvm_opts > 0:
-      if not os.environ.get('EMCC_OPTIMIZE_NORMALLY'):
-        shared.Building.llvm_opt(in_temp(target_basename + '.bc'), llvm_opts)
-        if DEBUG: save_intermediate('opt', 'bc')
-        # Do LTO in a separate pass to work around LLVM bug XXX (see failure e.g. in cubescript)
-      else:
-        logging.debug('not running opt because EMCC_OPTIMIZE_NORMALLY was specified, opt should have been run before')
+    if llvm_lto >= 2:
+      logging.debug('running LLVM opt -O3 as pre-LTO')
+      shared.Building.llvm_opt(in_temp(target_basename + '.bc'), ['-O3'])
+      if DEBUG: save_intermediate('opt', 'bc')
+
     if shared.Building.can_build_standalone():
       # If we can LTO, do it before dce, since it opens up dce opportunities
-      if llvm_lto and shared.Building.can_use_unsafe_opts():
+      if llvm_lto and llvm_lto != 2 and shared.Building.can_use_unsafe_opts():
         if not shared.Building.can_inline(): link_opts.append('-disable-inlining')
         # do not internalize in std-link-opts - it ignores internalize-public-api-list - and add a manual internalize
         link_opts += ['-disable-internalize'] + shared.Building.get_safe_internalize() + ['-std-link-opts']
@@ -1631,7 +1639,7 @@ try:
         match = re.match('.*?<script[^>]*>{{{ SCRIPT_CODE }}}</script>', shell,
             re.DOTALL)
         if match is None:
-          raise RuntimeError('Could not find script insertion point')
+          raise RuntimeError('''Could not find script insertion point - make sure you have   <script type='text/javascript'>{{{ SCRIPT_CODE }}}</script>   in your HTML file (with no newlines)''')
         generate_source_map(target, match.group().count('\n'))
       html.write(shell.replace('{{{ SCRIPT_CODE }}}', open(final).read()))
     else:
diff --git a/src/compiler.js b/src/compiler.js
index 94e77e26..365ff32f 100644
--- a/src/compiler.js
+++ b/src/compiler.js
@@ -176,6 +176,8 @@ DEAD_FUNCTIONS = numberedSet(DEAD_FUNCTIONS);
 
 RUNTIME_DEBUG = LIBRARY_DEBUG || GL_DEBUG;
 
+if (SAFE_HEAP) USE_BSS = 0; // must initialize heap for safe heap
+
 // Settings sanity checks
 
 assert(!(USE_TYPED_ARRAYS === 2 && QUANTUM_SIZE !== 4), 'For USE_TYPED_ARRAYS == 2, must have normal QUANTUM_SIZE of 4');
diff --git a/src/jsifier.js b/src/jsifier.js
index b13d39a3..885fbc30 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -285,42 +285,60 @@ function JSify(data, functionsOnly, givenFunctions) {
         index = makeGlobalUse(item.ident); // index !== null indicates we are indexing this
         allocator = 'ALLOC_NONE';
       }
-      if (item.external) {
-        if (Runtime.isNumberType(item.type) || isPointerType(item.type)) {
-          constant = zeros(Runtime.getNativeFieldSize(item.type));
-        } else {
-          constant = makeEmptyStruct(item.type);
+
+      if (isBSS(item)) {
+        var length = calcAllocatedSize(item.type);
+        length = Runtime.alignMemory(length);
+
+        // If using indexed globals, go ahead and early out (no need to explicitly
+        // initialize).
+        if (!NAMED_GLOBALS) {
+          return ret;
+        }
+        // If using named globals, we can at least shorten the call to allocate by
+        // passing an integer representing the size of memory to alloc instead of
+        // an array of 0s of size length.
+        else {
+          constant = length;
         }
       } else {
-        constant = parseConst(item.value, item.type, item.ident);
-      }
-      assert(typeof constant === 'object');//, [typeof constant, JSON.stringify(constant), item.external]);
-
-      // This is a flattened object. We need to find its idents, so they can be assigned to later
-      constant.forEach(function(value, i) {
-        if (needsPostSet(value)) { // ident, or expression containing an ident
-          ret.push({
-            intertype: 'GlobalVariablePostSet',
-            JS: makeSetValue(makeGlobalUse(item.ident), i, value, 'i32', false, true) + ';' // ignore=true, since e.g. rtti and statics cause lots of safe_heap errors
-          });
-          constant[i] = '0';
+        if (item.external) {
+          if (Runtime.isNumberType(item.type) || isPointerType(item.type)) {
+            constant = zeros(Runtime.getNativeFieldSize(item.type));
+          } else {
+            constant = makeEmptyStruct(item.type);
+          }
+        } else {
+          constant = parseConst(item.value, item.type, item.ident);
         }
-      });
+        assert(typeof constant === 'object');//, [typeof constant, JSON.stringify(constant), item.external]);
+
+        // This is a flattened object. We need to find its idents, so they can be assigned to later
+        constant.forEach(function(value, i) {
+          if (needsPostSet(value)) { // ident, or expression containing an ident
+            ret.push({
+              intertype: 'GlobalVariablePostSet',
+              JS: makeSetValue(makeGlobalUse(item.ident), i, value, 'i32', false, true) + ';' // ignore=true, since e.g. rtti and statics cause lots of safe_heap errors
+            });
+            constant[i] = '0';
+          }
+        });
 
-      if (item.external) {
-        // External variables in shared libraries should not be declared as
-        // they would shadow similarly-named globals in the parent, so do nothing here.
-        if (BUILD_AS_SHARED_LIB) return ret;
-        // Library items need us to emit something, but everything else requires nothing.
-        if (!LibraryManager.library[item.ident.slice(1)]) return ret;
-      }
+        if (item.external) {
+          // External variables in shared libraries should not be declared as
+          // they would shadow similarly-named globals in the parent, so do nothing here.
+          if (BUILD_AS_SHARED_LIB) return ret;
+          // Library items need us to emit something, but everything else requires nothing.
+          if (!LibraryManager.library[item.ident.slice(1)]) return ret;
+        }
 
-      // ensure alignment
-      constant = constant.concat(zeros(Runtime.alignMemory(constant.length) - constant.length));
+        // ensure alignment
+        constant = constant.concat(zeros(Runtime.alignMemory(constant.length) - constant.length));
 
-      // Special case: class vtables. We make sure they are null-terminated, to allow easy runtime operations
-      if (item.ident.substr(0, 5) == '__ZTV') {
-        constant = constant.concat(zeros(Runtime.alignMemory(QUANTUM_SIZE)));
+        // Special case: class vtables. We make sure they are null-terminated, to allow easy runtime operations
+        if (item.ident.substr(0, 5) == '__ZTV') {
+          constant = constant.concat(zeros(Runtime.alignMemory(QUANTUM_SIZE)));
+        }
       }
 
       // NOTE: This is the only place that could potentially create static
@@ -1568,7 +1586,7 @@ function JSify(data, functionsOnly, givenFunctions) {
         print('STATICTOP = STATIC_BASE + ' + Runtime.alignMemory(Variables.nextIndexedOffset) + ';\n');
       }
       var generated = itemsDict.function.concat(itemsDict.type).concat(itemsDict.GlobalVariableStub).concat(itemsDict.GlobalVariable);
-      print(generated.map(function(item) { return item.JS }).join('\n'));
+      print(generated.map(function(item) { return item.JS; }).join('\n'));
 
       if (phase == 'pre') {
         if (memoryInitialization.length > 0) {
diff --git a/src/library.js b/src/library.js
index 01a67804..822f4319 100644
--- a/src/library.js
+++ b/src/library.js
@@ -7224,6 +7224,23 @@ LibraryManager.library = {
     return 1;
   },
 
+  // netinet/in.h
+
+  _in6addr_any:
+    'allocate([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], "i8", ALLOC_STATIC)',
+  _in6addr_loopback:
+    'allocate([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1], "i8", ALLOC_STATIC)',
+  _in6addr_linklocal_allnodes:
+    'allocate([255,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1], "i8", ALLOC_STATIC)',
+  _in6addr_linklocal_allrouters:
+    'allocate([255,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2], "i8", ALLOC_STATIC)',
+  _in6addr_interfacelocal_allnodes:
+    'allocate([255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1], "i8", ALLOC_STATIC)',
+  _in6addr_interfacelocal_allrouters:
+    'allocate([255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2], "i8", ALLOC_STATIC)',
+  _in6addr_sitelocal_allrouters:
+    'allocate([255,5,0,0,0,0,0,0,0,0,0,0,0,0,0,2], "i8", ALLOC_STATIC)',
+
   // ==========================================================================
   // netdb.h
   // ==========================================================================
diff --git a/src/library_browser.js b/src/library_browser.js
index d007d9a7..d9fd3ee5 100644
--- a/src/library_browser.js
+++ b/src/library_browser.js
@@ -805,7 +805,7 @@ mergeInto(LibraryManager.library, {
         var t = process['hrtime']();
         return t[0] * 1e3 + t[1] / 1e6;
     }
-    else if (window['performance'] && window['performance']['now']) {
+    else if (ENVIRONMENT_IS_WEB && window['performance'] && window['performance']['now']) {
       return window['performance']['now']();
     } else {
       return Date.now();
diff --git a/src/parseTools.js b/src/parseTools.js
index 0b83a12b..6bc0b7ea 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -467,6 +467,18 @@ function isIndexableGlobal(ident) {
   return !data.alias && !data.external;
 }
 
+function isBSS(item) {
+  if (!USE_BSS) {
+    return false;
+  }
+
+  if (item.external) return false; // externals are typically implemented in a JS library, and must be accessed by name, explicitly
+
+  // return true if a global is uninitialized or initialized to 0
+  return (item.value && item.value.intertype === 'emptystruct') ||
+         (item.value && item.value.value !== undefined && item.value.value === '0');
+}
+
 function makeGlobalDef(ident) {
   if (!NAMED_GLOBALS && isIndexableGlobal(ident)) return '';
   return 'var ' + ident + ';';
@@ -490,7 +502,10 @@ function sortGlobals(globals) {
   ks.sort();
   var inv = invertArray(ks);
   return values(globals).sort(function(a, b) {
-    return inv[b.ident] - inv[a.ident];
+    // sort globals based on if they need to be explicitly initialized or not (moving
+    // values that don't need to be to the end of the array). if equal, sort by name.
+    return (Number(isBSS(a)) - Number(isBSS(b))) ||
+      (inv[b.ident] - inv[a.ident]);
   });
 }
 
diff --git a/src/relooper/Relooper.cpp b/src/relooper/Relooper.cpp
index aa7e71a1..ca9c6ab1 100644
--- a/src/relooper/Relooper.cpp
+++ b/src/relooper/Relooper.cpp
@@ -10,6 +10,10 @@
 
 // TODO: move all set to unorderedset
 
+template <class T, class U> bool contains(const T& container, const U& contained) {
+  return container.find(contained) != container.end();
+}
+
 #if DEBUG
 static void PrintDebug(const char *Format, ...);
 #define DebugDump(x, ...) Debugging::Dump(x, __VA_ARGS__)
@@ -100,7 +104,7 @@ void Branch::Render(Block *Target, bool SetLabel) {
 
 int Block::IdCounter = 1; // 0 is reserved for clearings
 
-Block::Block(const char *CodeInit) : Parent(NULL), Id(Block::IdCounter++), DefaultTarget(NULL), IsCheckedMultipleEntry(false) {
+Block::Block(const char *CodeInit) : Parent(NULL), Id(Block::IdCounter++), IsCheckedMultipleEntry(false) {
   Code = strdup(CodeInit);
 }
 
@@ -113,7 +117,7 @@ Block::~Block() {
 }
 
 void Block::AddBranchTo(Block *Target, const char *Condition, const char *Code) {
-  assert(BranchesOut.find(Target) == BranchesOut.end()); // cannot add more than one branch to the same target
+  assert(!contains(BranchesOut, Target)); // cannot add more than one branch to the same target
   BranchesOut[Target] = new Branch(Condition, Code);
 }
 
@@ -174,6 +178,8 @@ void Block::Render(bool InLoop) {
     }
   }
 
+  Block *DefaultTarget(NULL); // The block we branch to without checking the condition, if none of the other conditions held.
+
   // We must do this here, because blocks can be split and even comparing their Ids is not enough. We must check the conditions.
   for (BlockBranchMap::iterator iter = ProcessedBranchesOut.begin(); iter != ProcessedBranchesOut.end(); iter++) {
     if (!iter->second->Condition) {
@@ -181,7 +187,7 @@ void Block::Render(bool InLoop) {
       DefaultTarget = iter->first;
     }
   }
-  assert(DefaultTarget); // Must be a default
+  assert(DefaultTarget); // Since each block *must* branch somewhere, this must be set
 
   ministring RemainingConditions;
   bool First = true;
@@ -198,7 +204,7 @@ void Block::Render(bool InLoop) {
       Details = ProcessedBranchesOut[DefaultTarget];
     }
     bool SetCurrLabel = SetLabel && Target->IsCheckedMultipleEntry;
-    bool HasFusedContent = Fused && Fused->InnerMap.find(Target) != Fused->InnerMap.end();
+    bool HasFusedContent = Fused && contains(Fused->InnerMap, Target);
     bool HasContent = SetCurrLabel || Details->Type != Branch::Direct || HasFusedContent || Details->Code;
     if (iter != ProcessedBranchesOut.end()) {
       // If there is nothing to show in this branch, omit the condition
@@ -356,7 +362,7 @@ void Relooper::Calculate(Block *Entry) {
       while (ToInvestigate.size() > 0) {
         Block *Curr = ToInvestigate.front();
         ToInvestigate.pop_front();
-        if (Live.find(Curr) != Live.end()) continue;
+        if (contains(Live, Curr)) continue;
         Live.insert(Curr);
         for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
           ToInvestigate.push_back(iter->first);
@@ -380,7 +386,7 @@ void Relooper::Calculate(Block *Entry) {
       for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) {
         Block *Original = *iter;
         if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; // only dead ends, for now
-        if (Original->BranchesOut.find(Original) != Original->BranchesOut.end()) continue; // cannot split a looping node
+        if (contains(Original->BranchesOut, Original)) continue; // cannot split a looping node
         if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort
         // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original)
         PrintDebug("Splitting block %d\n", Original->Id);
@@ -423,7 +429,7 @@ void Relooper::Calculate(Block *Entry) {
   // Add incoming branches from live blocks, ignoring dead code
   for (int i = 0; i < Blocks.size(); i++) {
     Block *Curr = Blocks[i];
-    if (Pre.Live.find(Curr) == Pre.Live.end()) continue;
+    if (!contains(Pre.Live, Curr)) continue;
     for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
       iter->first->BranchesIn.insert(Curr);
     }
@@ -445,7 +451,7 @@ void Relooper::Calculate(Block *Entry) {
     // will appear
     void GetBlocksOut(Block *Source, BlockSet& Entries, BlockSet *LimitTo=NULL) {
       for (BlockBranchMap::iterator iter = Source->BranchesOut.begin(); iter != Source->BranchesOut.end(); iter++) {
-        if (!LimitTo || LimitTo->find(iter->first) != LimitTo->end()) {
+        if (!LimitTo || contains(*LimitTo, iter->first)) {
           Entries.insert(iter->first);
         }
       }
@@ -457,7 +463,7 @@ void Relooper::Calculate(Block *Entry) {
       DebugDump(From, "  relevant to solipsize: ");
       for (BlockSet::iterator iter = Target->BranchesIn.begin(); iter != Target->BranchesIn.end();) {
         Block *Prior = *iter;
-        if (From.find(Prior) == From.end()) {
+        if (!contains(From, Prior)) {
           iter++;
           continue;
         }
@@ -502,7 +508,7 @@ void Relooper::Calculate(Block *Entry) {
       while (Queue.size() > 0) {
         Block *Curr = *(Queue.begin());
         Queue.erase(Queue.begin());
-        if (InnerBlocks.find(Curr) == InnerBlocks.end()) {
+        if (!contains(InnerBlocks, Curr)) {
           // This element is new, mark it as inner and remove from outer
           InnerBlocks.insert(Curr);
           Blocks.erase(Curr);
@@ -518,7 +524,7 @@ void Relooper::Calculate(Block *Entry) {
         Block *Curr = *iter;
         for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
           Block *Possible = iter->first;
-          if (InnerBlocks.find(Possible) == InnerBlocks.end()) {
+          if (!contains(InnerBlocks, Possible)) {
             NextEntries.insert(Possible);
           }
         }
@@ -615,7 +621,7 @@ void Relooper::Calculate(Block *Entry) {
             Block *Invalidatee = ToInvalidate.front();
             ToInvalidate.pop_front();
             Block *Owner = Ownership[Invalidatee];
-            if (IndependentGroups.find(Owner) != IndependentGroups.end()) { // Owner may have been invalidated, do not add to IndependentGroups!
+            if (contains(IndependentGroups, Owner)) { // Owner may have been invalidated, do not add to IndependentGroups!
               IndependentGroups[Owner].erase(Invalidatee);
             }
             if (Ownership[Invalidatee]) { // may have been seen before and invalidated already
@@ -688,7 +694,7 @@ void Relooper::Calculate(Block *Entry) {
           Block *Child = *iter;
           for (BlockSet::iterator iter = Child->BranchesIn.begin(); iter != Child->BranchesIn.end(); iter++) {
             Block *Parent = *iter;
-            if (Ignore && Ignore->find(Parent) != Ignore->end()) continue;
+            if (Ignore && contains(*Ignore, Parent)) continue;
             if (Helper.Ownership[Parent] != Helper.Ownership[Child]) {
               ToInvalidate.push_back(Child);
             }
@@ -739,7 +745,7 @@ void Relooper::Calculate(Block *Entry) {
             Block *CurrTarget = iter->first;
             BlockBranchMap::iterator Next = iter;
             Next++;
-            if (CurrBlocks.find(CurrTarget) == CurrBlocks.end()) {
+            if (!contains(CurrBlocks, CurrTarget)) {
               NextEntries.insert(CurrTarget);
               Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); 
             }
@@ -756,7 +762,7 @@ void Relooper::Calculate(Block *Entry) {
       // Add entries not handled as next entries, they are deferred
       for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) {
         Block *Entry = *iter;
-        if (IndependentGroups.find(Entry) == IndependentGroups.end()) {
+        if (!contains(IndependentGroups, Entry)) {
           NextEntries.insert(Entry);
         }
       }
@@ -820,7 +826,7 @@ void Relooper::Calculate(Block *Entry) {
             BlockBlockSetMap::iterator curr = iter++; // iterate carefully, we may delete
             for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin(); iterBranch != Entry->BranchesIn.end(); iterBranch++) {
               Block *Origin = *iterBranch;
-              if (Group.find(Origin) == Group.end()) {
+              if (!contains(Group, Origin)) {
                 // Reached from outside the group, so we cannot handle this
                 PrintDebug("Cannot handle group with entry %d because of incoming branch from %d\n", Entry->Id, Origin->Id);
                 IndependentGroups.erase(curr);
@@ -858,7 +864,7 @@ void Relooper::Calculate(Block *Entry) {
                 Block *Curr = *iter;
                 for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) {
                   Block *Target = iter->first;
-                  if (SmallGroup.find(Target) == SmallGroup.end()) {
+                  if (!contains(SmallGroup, Target)) {
                     DeadEnd = false;
                     break;
                   }
@@ -909,13 +915,13 @@ void Relooper::Calculate(Block *Entry) {
 
     PostOptimizer(Relooper *ParentInit) : Parent(ParentInit), Closure(NULL) {}
 
-    #define RECURSE_MULTIPLE_MANUAL(func, manual) \
-      for (BlockShapeMap::iterator iter = manual->InnerMap.begin(); iter != manual->InnerMap.end(); iter++) { \
+    #define RECURSE_Multiple(shape, func) \
+      for (BlockShapeMap::iterator iter = shape->InnerMap.begin(); iter != shape->InnerMap.end(); iter++) { \
         func(iter->second); \
       }
-    #define RECURSE_MULTIPLE(func) RECURSE_MULTIPLE_MANUAL(func, Multiple);
-    #define RECURSE_LOOP(func) \
-      func(Loop->Inner);
+    #define RECURSE_Loop(shape, func) \
+      func(shape->Inner);
+    #define RECURSE(shape, func) RECURSE_##shape(shape, func);
 
     #define SHAPE_SWITCH(var, simple, multiple, loop) \
       if (SimpleShape *Simple = Shape::IsSimple(var)) { \
@@ -926,20 +932,6 @@ void Relooper::Calculate(Block *Entry) {
         loop; \
       }
 
-    #define SHAPE_SWITCH_AUTO(var, simple, multiple, loop, func) \
-      if (SimpleShape *Simple = Shape::IsSimple(var)) { \
-        simple; \
-        func(Simple->Next); \
-      } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \
-        multiple; \
-        RECURSE_MULTIPLE(func) \
-        func(Multiple->Next); \
-      } else if (LoopShape *Loop = Shape::IsLoop(var)) { \
-        loop; \
-        RECURSE_LOOP(func); \
-        func(Loop->Next); \
-      }
-
     // Find the blocks that natural control flow can get us directly to, or through a multiple that we ignore
     void FollowNaturalFlow(Shape *S, BlockSet &Out) {
       SHAPE_SWITCH(S, {
@@ -992,7 +984,7 @@ void Relooper::Calculate(Block *Entry) {
             for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) {
               Block *Target = iter->first;
               Branch *Details = iter->second;
-              if (Details->Type != Branch::Direct && NaturalBlocks.find(Target) != NaturalBlocks.end()) { // note: cannot handle split blocks
+              if (Details->Type != Branch::Direct && contains(NaturalBlocks, Target)) { // note: cannot handle split blocks
                 Details->Type = Branch::Direct;
                 if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) {
                   Multiple->NeedLoop--;
@@ -1036,7 +1028,7 @@ void Relooper::Calculate(Block *Entry) {
           // If we are fusing a Multiple with a loop into this Simple, then visit it now
           if (Fused && Fused->NeedLoop) {
             LoopStack.push(Fused);
-            RECURSE_MULTIPLE_MANUAL(FindLabeledLoops, Fused);
+            RECURSE_Multiple(Fused, FindLabeledLoops);
           }
           for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) {
             Block *Target = iter->first;
@@ -1062,14 +1054,14 @@ void Relooper::Calculate(Block *Entry) {
           if (Multiple->NeedLoop) {
             LoopStack.push(Multiple);
           }
-          RECURSE_MULTIPLE(FindLabeledLoops);
+          RECURSE(Multiple, FindLabeledLoops);
           if (Multiple->NeedLoop) {
             LoopStack.pop();
           }
           Next = Root->Next;
         }, {
           LoopStack.push(Loop);
-          RECURSE_LOOP(FindLabeledLoops);
+          RECURSE(Loop, FindLabeledLoops);
           LoopStack.pop();
           Next = Root->Next;
         });
@@ -1123,7 +1115,7 @@ void Debugging::Dump(BlockSet &Blocks, const char *prefix) {
     for (BlockBranchMap::iterator iter2 = Curr->BranchesOut.begin(); iter2 != Curr->BranchesOut.end(); iter2++) {
       Block *Other = iter2->first;
       printf("  -> %d\n", Other->Id);
-      assert(Other->BranchesIn.find(Curr) != Other->BranchesIn.end());
+      assert(contains(Other->BranchesIn, Curr));
     }
   }
 }
diff --git a/src/relooper/Relooper.h b/src/relooper/Relooper.h
index fe56a133..e54b578c 100644
--- a/src/relooper/Relooper.h
+++ b/src/relooper/Relooper.h
@@ -59,8 +59,6 @@ struct Block {
   Shape *Parent; // The shape we are directly inside
   int Id; // A unique identifier
   const char *Code; // The string representation of the code in this block. Owning pointer (we copy the input)
-  Block *DefaultTarget; // The block we branch to without checking the condition, if none of the other conditions held.
-                        // Since each block *must* branch somewhere, this must be set
   bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching us requires setting the label variable
 
   Block(const char *CodeInit);
diff --git a/src/settings.js b/src/settings.js
index 39774690..dff52adf 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -67,9 +67,9 @@ var RELOOP = 0; // Recreate js native loops from llvm data
 var RELOOPER = 'relooper.js'; // Loads the relooper from this path relative to compiler.js
 
 var USE_TYPED_ARRAYS = 2; // Use typed arrays for the heap. See https://github.com/kripken/emscripten/wiki/Code-Generation-Modes/
-                          // 0 means no typed arrays are used.
+                          // 0 means no typed arrays are used. This mode disallows LLVM optimizations
                           // 1 has two heaps, IHEAP (int32) and FHEAP (double),
-                          // and addresses there are a match for normal addresses. This is deprecated.
+                          // and addresses there are a match for normal addresses. This mode disallows LLVM optimizations.
                           // 2 is a single heap, accessible through views as int8, int32, etc. This is
                           //   the recommended mode both for performance and for compatibility.
 var USE_FHEAP = 1; // Relevant in USE_TYPED_ARRAYS == 1. If this is disabled, only IHEAP will be used, and FHEAP
@@ -236,6 +236,11 @@ var FS_LOG = 0; // Log all FS operations.  This is especially helpful when you'r
                 // a new project and want to see a list of file system operations happening
                 // so that you can create a virtual file system with all of the required files.
 
+var USE_BSS = 1; // https://en.wikipedia.org/wiki/.bss
+                 // When enabled, 0-initialized globals are sorted to the end of the globals list,
+                 // enabling us to not explicitly store the initialization value for each 0 byte.
+                 // This significantly lowers the memory initialization array size.
+
 var NAMED_GLOBALS = 0; // If 1, we use global variables for globals. Otherwise
                        // they are referred to by a base plus an offset (called an indexed global),
                        // saving global variables but adding runtime overhead.
@@ -1260,10 +1265,14 @@ var C_DEFINES = {'SI_MESGQ': '5',
    'SIGTTOU': '22',
    '_CS_POSIX_V7_LP64_OFF64_LDFLAGS': '10',
    '_SC_TTY_NAME_MAX': '41',
-   'AF_INET': '1',
+   'AF_INET': '2',
    'AF_INET6': '6',
+   'PF_INET': '2',
+   'PF_INET6': '6',
    'FIONREAD': '1',
    'SOCK_STREAM': '200',
-   'IPPROTO_TCP': 1
+   'SOCK_DGRAM': '20',
+   'IPPROTO_TCP': '1',
+   'IPPROTO_UDP': '2'
 };
 
diff --git a/src/shell.html b/src/shell.html
index 00765271..22bc9de9 100644
--- a/src/shell.html
+++ b/src/shell.html
@@ -86,7 +86,7 @@
         }
       };
       Module.setStatus('Downloading...');
-    </script>      
+    </script>
     <script type='text/javascript'>{{{ SCRIPT_CODE }}}</script>
   </body>
 </html>
diff --git a/system/include/libc/grp.h b/system/include/libc/grp.h
index 1273e395..61a1b2c7 100644
--- a/system/include/libc/grp.h
+++ b/system/include/libc/grp.h
@@ -82,9 +82,9 @@ void		 setgrfile (const char *);
 char		*group_from_gid (gid_t, int);
 int		 setgroupent (int);
 #endif /* !__CYGWIN__ */
-int		 initgroups (const char *, gid_t);
 #endif /* !_XOPEN_SOURCE */
 #endif /* !_POSIX_SOURCE */
+int		 initgroups (const char *, gid_t);
 #endif /* !__INSIDE_CYGWIN__ */
 
 #ifdef __cplusplus
diff --git a/system/include/libc/sys/signal.h b/system/include/libc/sys/signal.h
index 910ccf78..49a94d80 100644
--- a/system/include/libc/sys/signal.h
+++ b/system/include/libc/sys/signal.h
@@ -73,6 +73,7 @@ typedef struct {
                          /*   three arguments instead of one. */
 #define SA_NODEFER   4   /* XXX Emscripten */
 #define SA_RESETHAND 8   /* XXX Emscripten */
+#define SA_RESTART   16  /* XXX Emscripten */
 
 /* struct sigaction notes from POSIX:
  *
diff --git a/system/include/libc/sys/stat.h b/system/include/libc/sys/stat.h
index b31dc079..e2b20187 100644
--- a/system/include/libc/sys/stat.h
+++ b/system/include/libc/sys/stat.h
@@ -58,6 +58,42 @@ struct	stat
 #endif
 };
 
+struct	stat64
+{
+  dev_t		st_dev;
+  ino_t		st_ino;
+  mode_t	st_mode;
+  nlink_t	st_nlink;
+  uid_t		st_uid;
+  gid_t		st_gid;
+  dev_t		st_rdev;
+  off_t		st_size;
+#if defined(__rtems__)
+  struct timespec st_atim;
+  struct timespec st_mtim;
+  struct timespec st_ctim;
+  blksize_t     st_blksize;
+  blkcnt_t	st_blocks;
+#else
+  /* SysV/sco doesn't have the rest... But Solaris, eabi does.  */
+#if defined(__svr4__) && !defined(__PPC__) && !defined(__sun__)
+  time_t	st_atime;
+  time_t	st_mtime;
+  time_t	st_ctime;
+#else
+  time_t	st_atime;
+  long		st_spare1;
+  time_t	st_mtime;
+  long		st_spare2;
+  time_t	st_ctime;
+  long		st_spare3;
+  long		st_blksize;
+  long		st_blocks;
+  long	st_spare4[2];
+#endif
+#endif
+};
+
 #if defined(__rtems__)
 #define st_atime st_atim.tv_sec
 #define st_ctime st_ctim.tv_sec
@@ -145,16 +181,16 @@ struct	stat
 int	_EXFUN(chmod,( const char *__path, mode_t __mode ));
 int     _EXFUN(fchmod,(int __fd, mode_t __mode));
 int	_EXFUN(fstat,( int __fd, struct stat *__sbuf ));
-int	_EXFUN(fstat64,( int __fd, struct stat *__sbuf )); /* XXX Emscripten */
+int	_EXFUN(fstat64,( int __fd, struct stat64 *__sbuf )); /* XXX Emscripten */
 int	_EXFUN(mkdir,( const char *_path, mode_t __mode ));
 int	_EXFUN(mkfifo,( const char *__path, mode_t __mode ));
 int	_EXFUN(stat,( const char *__path, struct stat *__sbuf ));
-int	_EXFUN(stat64,( const char *__path, struct stat *__sbuf )); /* XXX Emscripten */
+int	_EXFUN(stat64,( const char *__path, struct stat64 *__sbuf )); /* XXX Emscripten */
 mode_t	_EXFUN(umask,( mode_t __mask ));
 
 #if defined(EMSCRIPTEN) || defined (__SPU__) || defined(__rtems__) || defined(__CYGWIN__) && !defined(__INSIDE_CYGWIN__) 
 int	_EXFUN(lstat,( const char *__path, struct stat *__buf ));
-int	_EXFUN(lstat64,( const char *__path, struct stat *__buf )); /* XXX Emscripten */
+int	_EXFUN(lstat64,( const char *__path, struct stat64 *__buf )); /* XXX Emscripten */
 int	_EXFUN(mknod,( const char *__path, mode_t __mode, dev_t __dev ));
 #endif
 
diff --git a/system/include/libc/time.h b/system/include/libc/time.h
index 83993942..2548d6be 100644
--- a/system/include/libc/time.h
+++ b/system/include/libc/time.h
@@ -126,6 +126,9 @@ extern __IMPORT char *_tzname[2];
 #ifndef tzname
 #define tzname _tzname
 #endif
+#ifndef timezone
+#define timezone _timezone
+#endif
 #endif /* !__STRICT_ANSI__ */
 
 #ifdef __cplusplus
diff --git a/system/include/net/netinet/in.h b/system/include/net/netinet/in.h
index 569a56b0..cf324f3d 100644
--- a/system/include/net/netinet/in.h
+++ b/system/include/net/netinet/in.h
@@ -7,6 +7,7 @@ extern "C" {
 #endif
 
 #include <arpa/inet.h>
+#include <stdint.h>
 
 enum {
     IPPROTO_IP = 0,
@@ -19,8 +20,10 @@ enum {
 };
 
 #define INET_ADDRSTRLEN 16
+#define INET6_ADDRSTRLEN 46
 
 #define INADDR_ANY 0
+#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
 
 struct in_addr {
   unsigned long s_addr;
@@ -34,12 +37,27 @@ struct sockaddr_in {
 };
 
 struct in6_addr {
-  unsigned char s6_addr[16];
+  union {
+    uint8_t _s6_addr8[16];
+    uint16_t _s6_addr16[8];
+    uint32_t _s6_addr32[4];
+  } _u;
+#define s6_addr _u._s6_addr8
+#define s6_addr16 _u._s6_addr16
+#define s6_addr32 _u._s6_addr32
 };
 
+extern const struct in6_addr in6addr_any;
+extern const struct in6_addr in6addr_loopback;
+extern const struct in6_addr in6addr_linklocal_allnodes;
+extern const struct in6_addr in6addr_linklocal_allrouters;
+extern const struct in6_addr in6addr_interfacelocal_allnodes;
+extern const struct in6_addr in6addr_interfacelocal_allrouters;
+extern const struct in6_addr in6addr_sitelocal_allrouters;
+
 struct sockaddr_in6 {
-  short           sin6_family;
-  short           sin6_port;
+  int             sin6_family;
+  unsigned short  sin6_port;
   int             sin6_flowinfo;
   struct in6_addr sin6_addr;
   int             sin6_scope_id;
@@ -70,6 +88,76 @@ struct ip_mreq {
 #define IP_MULTICAST_ALL 49
 #define IP_UNICAST_IF 50
 
+/*
+ * Tests for IPv6 address types
+ */
+
+#define	IN6_IS_ADDR_LINKLOCAL(addr) \
+       	(((addr)->s6_addr32[0] & htonl(0xffc00000)) == htonl(0xfe800000))
+
+#define	IN6_IS_ADDR_LOOPBACK(addr) \
+       	(((addr)->s6_addr32[0] == 0) && ((addr)->s6_addr32[1] == 0) && \
+	 ((addr)->s6_addr32[2] == 0) && ((addr)->s6_addr32[3] == htonl(1)))
+
+#define	IN6_IS_ADDR_MULTICAST(addr) \
+       	((addr)->s6_addr8[0] == 0xff)
+	
+#define	IN6_IS_ADDR_SITELOCAL(addr) \
+       	(((addr)->s6_addr32[0] & htonl(0xffc00000)) == htonl(0xfec00000))
+
+#define	IN6_IS_ADDR_UNSPECIFIED(addr) \
+       	(((addr)->s6_addr32[0] == 0) && ((addr)->s6_addr32[1] == 0) && \
+	 ((addr)->s6_addr32[2] == 0) && ((addr)->s6_addr32[3] == 0))
+
+#define	IN6_IS_ADDR_V4COMPAT(addr) \
+       	(((addr)->s6_addr32[0] == 0) && ((addr)->s6_addr32[1] == 0) && \
+	 ((addr)->s6_addr32[2] == 0) && ((addr)->s6_addr32[3] & ~htonl(1)))
+
+#define	IN6_IS_ADDR_V4MAPPED(addr) \
+       	(((addr)->s6_addr32[0] == 0) && ((addr)->s6_addr32[1] == 0) && \
+	 ((addr)->s6_addr32[2] == htonl(0xffff)))
+
+#define	IN6_ARE_ADDR_EQUAL(addr1, addr2) \
+       	(((addr1)->s6_addr32[0] == (addr2)->s6_addr32[0]) && \
+	 ((addr1)->s6_addr32[1] == (addr2)->s6_addr32[1]) && \
+	 ((addr1)->s6_addr32[2] == (addr2)->s6_addr32[2]) && \
+	 ((addr1)->s6_addr32[3] == (addr2)->s6_addr32[3]))
+
+/*
+ * IPv6 Multicast scoping.  The scope is stored
+ * in the bottom 4 bits of the second byte of the
+ * multicast address.
+ */
+		     /* 0x0 */	/* reserved */
+#define	IN6_NODE_LOCAL	0x1	/* node-local scope */
+#define	IN6_LINK_LOCAL	0x2	/* link-local scope */
+		     /* 0x3 */	/* (unassigned) */
+		     /* 0x4 */	/* (unassigned) */
+#define	IN6_SITE_LOCAL	0x5	/* site-local scope */
+		     /* 0x6 */	/* (unassigned) */
+		     /* 0x7 */	/* (unassigned) */
+#define	IN6_ORG_LOCAL	0x8	/* organization-local scope */
+		     /* 0x9 */	/* (unassigned) */
+		     /* 0xA */	/* (unassigned) */
+		     /* 0xB */	/* (unassigned) */
+		     /* 0xC */	/* (unassigned) */
+		     /* 0xD */	/* (unassigned) */
+#define	IN6_GLOBAL	0xE	/* global scope */
+		     /* 0xF */	/* reserved */
+
+#define	IN6_MSCOPE(addr)	((addr)->s6_addr8[1] & 0x0f)
+
+#define	IN6_IS_ADDR_MC_NODELOCAL(addr) \
+       	(IN6_IS_ADDR_MULTICAST(addr) && (IN6_MSCOPE(addr) == IN6_NODE_LOCAL))
+#define	IN6_IS_ADDR_MC_LINKLOCAL(addr) \
+       	(IN6_IS_ADDR_MULTICAST(addr) && (IN6_MSCOPE(addr) == IN6_LINK_LOCAL))
+#define	IN6_IS_ADDR_MC_SITELOCAL(addr) \
+       	(IN6_IS_ADDR_MULTICAST(addr) && (IN6_MSCOPE(addr) == IN6_SITE_LOCAL))
+#define	IN6_IS_ADDR_MC_ORGLOCAL(addr) \
+       	(IN6_IS_ADDR_MULTICAST(addr) && (IN6_MSCOPE(addr) == IN6_ORG_LOCAL))
+#define	IN6_IS_ADDR_MC_GLOBAL(addr) \
+       	(IN6_IS_ADDR_MULTICAST(addr) && (IN6_MSCOPE(addr) == IN6_GLOBAL))
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/system/include/sys/socket.h b/system/include/sys/socket.h
index b83ce89a..3168f85b 100644
--- a/system/include/sys/socket.h
+++ b/system/include/sys/socket.h
@@ -38,7 +38,7 @@ extern "C" {
 #define SHUT_RDWR 2
 
 typedef unsigned int sa_family_t;
-#define AF_INET 1
+#define AF_INET PF_INET
 #define AF_INET6 6
 #define PF_INET6 AF_INET6
 
diff --git a/tests/embind/shell.html b/tests/embind/shell.html
index 6664ec78..c3655e03 100644
--- a/tests/embind/shell.html
+++ b/tests/embind/shell.html
@@ -85,10 +85,6 @@
       };
       Module.setStatus('Downloading...');
     </script>      
-    <script type='text/javascript'>
-
-      {{{ SCRIPT_CODE }}}
-
-    </script>
+    <script type='text/javascript'>{{{ SCRIPT_CODE }}}</script>
   </body>
 </html>
diff --git a/tests/fuzz/creduce_tester.py b/tests/fuzz/creduce_tester.py
index c3460e9d..d5618c2e 100755
--- a/tests/fuzz/creduce_tester.py
+++ b/tests/fuzz/creduce_tester.py
@@ -1,53 +1,53 @@
 #!/usr/bin/python
 
 '''
-Runs csmith, a C fuzzer, and looks for bugs
+Usage: creduce ./creduce_tester.py newfail1.c
 '''
 
-import os, sys, difflib
+import os, sys
 from subprocess import Popen, PIPE, STDOUT
 
 sys.path += [os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'tools')]
-import shared
+import shared, jsrun
+
+# creduce will only pass the filename of the C file as the first arg, so other
+# configuration options will have to be hardcoded.
+CSMITH_CFLAGS = ['-I', os.path.join(os.environ['CSMITH_PATH'], 'runtime')]
+ENGINE = shared.JS_ENGINES[0]
+EMCC_ARGS = ['-O2', '-s', 'ASM_JS=1', '-s', 'PRECISE_I64_MATH=1', '-s',
+  'PRECISE_I32_MUL=1']
 
 filename = sys.argv[1]
+obj_filename = os.path.splitext(filename)[0]
+js_filename = obj_filename + '.js'
 print 'testing file', filename
 
-print '2) Compile natively'
-shared.try_delete(filename)
-shared.execute([shared.CLANG_CC, '-O2', filename + '.c', '-o', filename] + CSMITH_CFLAGS, stderr=PIPE)
-assert os.path.exists(filename)
-print '3) Run natively'
 try:
-  correct = shared.timeout_run(Popen([filename], stdout=PIPE, stderr=PIPE), 3)
+  print '2) Compile natively'
+  shared.check_execute([shared.CLANG_CC, '-O2', filename, '-o', obj_filename] + CSMITH_CFLAGS)
+  print '3) Run natively'
+  correct = jsrun.timeout_run(Popen([obj_filename], stdout=PIPE, stderr=PIPE), 3)
 except Exception, e:
   print 'Failed or infinite looping in native, skipping', e
-  notes['invalid'] += 1
-  os.exit(0) # boring 
+  sys.exit(1) # boring
 
 print '4) Compile JS-ly and compare'
 
 def try_js(args):
-  shared.try_delete(filename + '.js')
-  shared.execute([shared.EMCC, '-O2', '-s', 'ASM_JS=1', '-s', 'PRECISE_I64_MATH=1', '-s', 'PRECISE_I32_MUL=1', filename + '.c', '-o', filename + '.js'] + CSMITH_CFLAGS + args, stderr=PIPE)
-  assert os.path.exists(filename + '.js')
-  js = shared.run_js(filename + '.js', stderr=PIPE, engine=engine1)
-  assert correct == js, ''.join([a.rstrip()+'\n' for a in difflib.unified_diff(correct.split('\n'), js.split('\n'), fromfile='expected', tofile='actual')])
+  shared.check_execute([shared.EMCC] + EMCC_ARGS + CSMITH_CFLAGS + args +
+    [filename, '-o', js_filename])
+  js = shared.run_js(js_filename, stderr=PIPE, engine=ENGINE)
+  assert correct == js
 
 # Try normally, then try unaligned because csmith does generate nonportable code that requires x86 alignment
-ok = False
-normal = True
-for args, note in [([], None), (['-s', 'UNALIGNED_MEMORY=1'], 'unaligned')]:
+# If you are sure that alignment is not the cause, disable it for a faster reduction
+for args in [[]]:
   try:
     try_js(args)
-    ok = True
-    if note:
-      notes[note] += 1
     break
   except Exception, e:
-    print e
-    normal = False
-if not ok: sys.exit(1)
-
-sys.exit(0) # boring
+    pass
+else:
+  sys.exit(0)
 
+sys.exit(1) # boring
diff --git a/tests/fuzz/csmith_driver.py b/tests/fuzz/csmith_driver.py
index b60e67f7..c987a3be 100755
--- a/tests/fuzz/csmith_driver.py
+++ b/tests/fuzz/csmith_driver.py
@@ -1,11 +1,14 @@
 #!/usr/bin/python
 
 '''
-Runs csmith, a C fuzzer, and looks for bugs
+Runs csmith, a C fuzzer, and looks for bugs.
+
+CSMITH_PATH should be set to something like /usr/local/include/csmith
 '''
 
 import os, sys, difflib, shutil
-from subprocess import Popen, PIPE, STDOUT
+from distutils.spawn import find_executable
+from subprocess import check_call, Popen, PIPE, STDOUT, CalledProcessError
 
 sys.path += [os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'tools')]
 import shared
@@ -15,8 +18,11 @@ engine2 = eval('shared.' + sys.argv[2]) if len(sys.argv) > 2 else None
 
 print 'testing js engines', engine1, engine2
 
-CSMITH = os.path.expanduser('~/Dev/csmith/src/csmith')
-CSMITH_CFLAGS = ['-I' + os.path.expanduser('~/Dev/csmith/runtime/')]
+CSMITH = os.environ.get('CSMITH') or find_executable('csmith')
+assert CSMITH, 'Could not find CSmith on your PATH. Please set the environment variable CSMITH.'
+CSMITH_PATH = os.environ.get('CSMITH_PATH')
+assert CSMITH_PATH, 'Please set the environment variable CSMITH_PATH.'
+CSMITH_CFLAGS = ['-I', os.path.join(CSMITH_PATH, 'runtime')]
 
 filename = os.path.join(shared.CANONICAL_TEMP_DIR, 'fuzzcode')
 
@@ -31,7 +37,7 @@ fails = 0
 while 1:
   print 'Tried %d, notes: %s' % (tried, notes)
   print '1) Generate C'
-  shared.execute([CSMITH, '--no-volatiles', '--no-math64', '--no-packed-struct'],# +
+  check_call([CSMITH, '--no-volatiles', '--no-math64', '--no-packed-struct'],# +
                  #['--max-block-depth', '2', '--max-block-size', '2', '--max-expr-complexity', '2', '--max-funcs', '2'],
                  stdout=open(filename + '.c', 'w'))
   #shutil.copyfile(filename + '.c', 'testcase%d.c' % tried)
@@ -41,11 +47,11 @@ while 1:
 
   print '2) Compile natively'
   shared.try_delete(filename)
-  shared.execute([shared.CLANG_CC, '-O2', filename + '.c', '-o', filename + '1'] + CSMITH_CFLAGS, stderr=PIPE) #  + shared.EMSDK_OPTS
-  shared.execute([shared.CLANG_CC, '-O2', '-emit-llvm', '-c', '-Xclang', '-triple=i386-pc-linux-gnu', filename + '.c', '-o', filename + '.bc'] + CSMITH_CFLAGS + shared.EMSDK_OPTS, stderr=PIPE)
-  shared.execute([shared.path_from_root('tools', 'nativize_llvm.py'), filename + '.bc'], stdout=PIPE, stderr=PIPE)
+  shared.check_execute([shared.CLANG_CC, '-O2', filename + '.c', '-o', filename + '1'] + CSMITH_CFLAGS) #  + shared.EMSDK_OPTS
+  shared.check_execute([shared.CLANG_CC, '-O2', '-emit-llvm', '-c', '-Xclang', '-triple=i386-pc-linux-gnu', filename + '.c', '-o', filename + '.bc'] + CSMITH_CFLAGS + shared.EMSDK_OPTS)
+  shared.check_execute([shared.path_from_root('tools', 'nativize_llvm.py'), filename + '.bc'])
   shutil.move(filename + '.bc.run', filename + '2')
-  shared.execute([shared.CLANG_CC, filename + '.c', '-o', filename + '3'] + CSMITH_CFLAGS, stderr=PIPE)
+  shared.check_execute([shared.CLANG_CC, filename + '.c', '-o', filename + '3'] + CSMITH_CFLAGS)
   print '3) Run natively'
   try:
     correct1 = shared.jsrun.timeout_run(Popen([filename + '1'], stdout=PIPE, stderr=PIPE), 3)
@@ -65,7 +71,7 @@ while 1:
   def try_js(args):
     shared.try_delete(filename + '.js')
     print '(compile)'
-    shared.execute([shared.EMCC, '-O2', '-s', 'ASM_JS=1', filename + '.c', '-o', filename + '.js'] + CSMITH_CFLAGS + args, stderr=PIPE)
+    shared.check_execute([shared.EMCC, '-O2', '-s', 'ASM_JS=1', filename + '.c', '-o', filename + '.js'] + CSMITH_CFLAGS + args)
     assert os.path.exists(filename + '.js')
     print '(run)'
     js = shared.run_js(filename + '.js', stderr=PIPE, engine=engine1, check_timeout=True)
@@ -91,7 +97,7 @@ while 1:
     print "EMSCRIPTEN BUG"
     notes['embug'] += 1
     fails += 1
-    shutil.copyfile('fuzzcode.c', 'newfail%d.c' % fails)
+    shutil.copyfile(filename + '.c', 'newfail%d.c' % fails)
     continue
   #if not ok:
   #  try: # finally, try with safe heap. if that is triggered, this is nonportable code almost certainly
@@ -118,7 +124,7 @@ while 1:
       print "ODIN VALIDATION BUG"
       notes['embug'] += 1
       fails += 1
-      shutil.copyfile('fuzzcode.c', 'newfail%d.c' % fails)
+      shutil.copyfile(filename + '.c', 'newfail%d.c' % fails)
       continue
 
     js2 = js2.replace('\nwarning: Successfully compiled asm.js code\n', '')
diff --git a/tests/hello_world_gles_shell.html b/tests/hello_world_gles_shell.html
index 4abee90c..2459d755 100644
--- a/tests/hello_world_gles_shell.html
+++ b/tests/hello_world_gles_shell.html
@@ -48,9 +48,8 @@
       }
       Module.postRun = doTest;
 
-      // The compiled code
-      {{{ SCRIPT_CODE }}}
     </script>
+    <script>{{{ SCRIPT_CODE }}}</script>
   </body>
 </html>
 
diff --git a/tests/runner.py b/tests/runner.py
index 46dc5e11..2ce72240 100755
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -230,11 +230,11 @@ process(sys.argv[1])
         os.remove(f + '.o')
       except:
         pass
-      args = [PYTHON, EMCC] + Building.COMPILER_TEST_OPTS + \
+      args = [PYTHON, EMCC] + Building.COMPILER_TEST_OPTS + Settings.serialize() + \
              ['-I', dirname, '-I', os.path.join(dirname, 'include')] + \
              map(lambda include: '-I' + include, includes) + \
              ['-c', f, '-o', f + '.o']
-      output = Popen(args, stdout=PIPE, stderr=self.stderr_redirect).communicate()[0]
+      output = Popen(args, stdout=PIPE, stderr=self.stderr_redirect if not DEBUG else None).communicate()[0]
       assert os.path.exists(f + '.o'), 'Source compilation error: ' + output
 
     # Link all files
@@ -435,7 +435,7 @@ process(sys.argv[1])
 
 sys.argv = map(lambda arg: arg if not arg.startswith('test_') else 'default.' + arg, sys.argv)
 
-test_modes = ['default', 'o1', 'o2', 'asm1', 'asm2', 'asm2g', 'asm2x86', 's_0_0', 's_0_1', 's_1_0', 's_1_1']
+test_modes = ['default', 'o1', 'o2', 'asm1', 'asm2', 'asm2g', 'asm2x86', 's_0_0', 's_0_1']
 
 test_index = 0
 
@@ -2104,12 +2104,12 @@ Succeeded!
             }
             return int(&x); // both for the number, and forces x to not be nativized
           }
-          int main()
+          int main(int argc, char **argv)
           {
             // We should get the same value for the first and last - stack has unwound
-            int x1 = test(0);
+            int x1 = test(argc - 2);
             int x2 = test(100);
-            int x3 = test(0);
+            int x3 = test((argc - 2) / 4);
             printf("*%d,%d*\\n", x3-x1, x2 != x1);
             return 0;
           }
@@ -4079,7 +4079,7 @@ def process(filename):
         #include <assert.h>
         #include "emscripten.h"
 
-        int main()
+        int main(int argc, char **argv)
         {
           char *buf1 = (char*)malloc(100);
           char *data1 = "hello";
@@ -4093,6 +4093,8 @@ def process(filename):
 
           int totalMemory = emscripten_run_script_int("TOTAL_MEMORY");
           char *buf3 = (char*)malloc(totalMemory+1);
+          buf3[argc] = (int)buf2;
+          if (argc % 7 == 6) printf("%d\n", memcpy(buf3, buf1, argc));
           char *buf4 = (char*)malloc(100);
           float *buf5 = (float*)malloc(100);
           //printf("totalMemory: %d bufs: %d,%d,%d,%d,%d\n", totalMemory, buf1, buf2, buf3, buf4, buf5);
@@ -4230,6 +4232,8 @@ def process(filename):
 ''', args=['34962', '26214', '35040'])
 
     def test_indirectbr(self):
+        Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS)
+
         src = '''
           #include <stdio.h>
           int main(void) {
@@ -7550,11 +7554,11 @@ def process(filename):
           return 0;
         }
       '''
-      self.do_run(src, '''www.cheezburger.com : 1 : 4
+      self.do_run(src, '''www.cheezburger.com : 2 : 4
 * -84.29.1.0.
-fail.on.this.never.work : 1 : 4
+fail.on.this.never.work : 2 : 4
 * -84.29.2.0.
-localhost : 1 : 4
+localhost : 2 : 4
 * -84.29.3.0.
 ''')
 
@@ -8217,7 +8221,7 @@ void*:16
       if self.run_name == 'o2':
         self.emcc_args += ['--closure', '1'] # Use closure here for some additional coverage
 
-      Building.COMPILER_TEST_OPTS = [] # remove -g, so we have one test without it by default
+      Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS) # remove -g, so we have one test without it by default
       if self.emcc_args is None: Settings.SAFE_HEAP = 0 # Has some actual loads of unwritten-to places, in the C++ code...
 
       # Overflows happen in hash loop
@@ -8249,7 +8253,7 @@ void*:16
     def test_gcc_unmangler(self):
       Settings.NAMED_GLOBALS = 1 # test coverage for this
 
-      Building.COMPILER_TEST_OPTS = ['-I' + path_from_root('third_party')]
+      Building.COMPILER_TEST_OPTS += ['-I' + path_from_root('third_party')]
 
       self.do_run(open(path_from_root('third_party', 'gcc_demangler.c')).read(), '*d_demangle(char const*, int, unsigned int*)*', args=['_ZL10d_demanglePKciPj'])
 
@@ -8364,6 +8368,8 @@ def process(filename):
                    force_c=True)
 
     def test_zlib(self):
+      if not Settings.USE_TYPED_ARRAYS == 2: return self.skip('works in general, but cached build will be optimized and fail, so disable this')
+
       if Settings.ASM_JS:
         self.banned_js_engines = [NODE_JS] # TODO investigate
 
@@ -8472,6 +8478,8 @@ def process(filename):
     def test_openjpeg(self):
       if self.emcc_args is None: return self.skip('needs libc for getopt')
 
+      Building.COMPILER_TEST_OPTS = filter(lambda x: x != '-g', Building.COMPILER_TEST_OPTS) # remove -g, so we have one test without it by default
+
       if Settings.USE_TYPED_ARRAYS == 2:
         Settings.CORRECT_SIGNS = 1
       else:
@@ -8593,6 +8601,7 @@ def process(filename):
     def test_python(self):
       if self.emcc_args is None: return self.skip('requires emcc')
       if Settings.QUANTUM_SIZE == 1: return self.skip('TODO: make this work')
+      if not self.is_le32(): return self.skip('fails on non-le32') # FIXME
 
       #Settings.EXPORTED_FUNCTIONS += ['_PyRun_SimpleStringFlags'] # for the demo
 
@@ -8692,6 +8701,7 @@ def process(filename):
 
     def test_autodebug(self):
       if Building.LLVM_OPTS: return self.skip('LLVM opts mess us up')
+      Building.COMPILER_TEST_OPTS += ['--llvm-opts', '0']
 
       # Run a test that should work, generating some code
       self.test_structs()
@@ -8970,13 +8980,11 @@ def process(filename):
       self.do_run(src, output)
       shutil.move(self.in_dir('src.cpp.o.js'), self.in_dir('normal.js'))
 
-      self.emcc_args.append('-s')
-      self.emcc_args.append('ASM_JS=0')
+      Settings.ASM_JS = 0
       Settings.PGO = 1
       self.do_run(src, output)
+      Settings.ASM_JS = 1
       Settings.PGO = 0
-      self.emcc_args.append('-s')
-      self.emcc_args.append('ASM_JS=1')
 
       shutil.move(self.in_dir('src.cpp.o.js'), self.in_dir('pgo.js'))
       pgo_output = run_js(self.in_dir('pgo.js')).split('\n')[1]
@@ -9699,7 +9707,9 @@ def process(filename):
       if Settings.ASM_JS: return self.skip('asm always has corrections on')
 
       if '-g' not in Building.COMPILER_TEST_OPTS: Building.COMPILER_TEST_OPTS.append('-g')
-      if self.emcc_args: self.emcc_args += ['--llvm-opts', '0'] # llvm full opts make the expected failures here not happen
+      if self.emcc_args:
+        self.emcc_args += ['--llvm-opts', '0'] # llvm full opts make the expected failures here not happen
+        Building.COMPILER_TEST_OPTS += ['--llvm-opts', '0']
 
       Settings.CHECK_SIGNS = 0
       Settings.CHECK_OVERFLOWS = 0
@@ -10005,7 +10015,7 @@ finalizing 3 (global == 0)
 ''')
 
   # Generate tests for everything
-  def make_run(fullname, name=-1, compiler=-1, llvm_opts=0, embetter=0, quantum_size=0, typed_arrays=0, emcc_args=None, env='{}'):
+  def make_run(fullname, name=-1, compiler=-1, embetter=0, quantum_size=0, typed_arrays=0, emcc_args=None, env='{}'):
     exec('''
 class %s(T):
   run_name = '%s'
@@ -10040,9 +10050,18 @@ class %s(T):
       Building.LLVM_OPTS = 0
       if '-O2' in self.emcc_args:
         Building.COMPILER_TEST_OPTS = [] # remove -g in -O2 tests, for more coverage
+      #Building.COMPILER_TEST_OPTS += self.emcc_args
+      for arg in self.emcc_args:
+        if arg.startswith('-O'):
+          Building.COMPILER_TEST_OPTS.append(arg) # so bitcode is optimized too, this is for cpp to ll
+        else:
+          try:
+            key, value = arg.split('=')
+            Settings[key] = value # forward  -s K=V
+          except:
+            pass
       return
 
-    llvm_opts = %d # 1 is yes, 2 is yes and unsafe
     embetter = %d
     quantum_size = %d
     # TODO: Move much of these to a init() function in shared.py, and reuse that
@@ -10052,14 +10071,13 @@ class %s(T):
     Settings.MICRO_OPTS = embetter
     Settings.QUANTUM_SIZE = quantum_size
     Settings.ASSERTIONS = 1-embetter
-    Settings.SAFE_HEAP = 1-(embetter and llvm_opts)
-    Building.LLVM_OPTS = llvm_opts
-    Settings.CHECK_OVERFLOWS = 1-(embetter or llvm_opts)
-    Settings.CORRECT_OVERFLOWS = 1-(embetter and llvm_opts)
+    Settings.SAFE_HEAP = 1-embetter
+    Settings.CHECK_OVERFLOWS = 1-embetter
+    Settings.CORRECT_OVERFLOWS = 1-embetter
     Settings.CORRECT_SIGNS = 0
     Settings.CORRECT_ROUNDINGS = 0
     Settings.CORRECT_OVERFLOWS_LINES = CORRECT_SIGNS_LINES = CORRECT_ROUNDINGS_LINES = SAFE_HEAP_LINES = []
-    Settings.CHECK_SIGNS = 0 #1-(embetter or llvm_opts)
+    Settings.CHECK_SIGNS = 0 #1-embetter
     Settings.RUNTIME_TYPE_INFO = 0
     Settings.DISABLE_EXCEPTION_CATCHING = 0
     Settings.INCLUDE_FULL_LIBRARY = 0
@@ -10068,12 +10086,10 @@ class %s(T):
     Settings.EMULATE_UNALIGNED_ACCESSES = int(Settings.USE_TYPED_ARRAYS == 2 and Building.LLVM_OPTS == 2)
     Settings.DOUBLE_MODE = 1 if Settings.USE_TYPED_ARRAYS and Building.LLVM_OPTS == 0 else 0
     Settings.PRECISE_I64_MATH = 0
-    Settings.NAMED_GLOBALS = 0 if not (embetter and llvm_opts) else 1
-
-    Building.pick_llvm_opts(3)
+    Settings.NAMED_GLOBALS = 0 if not embetter else 1
 
 TT = %s
-''' % (fullname, fullname, env, fullname, fullname, compiler, str(emcc_args), llvm_opts, embetter, quantum_size, typed_arrays, fullname))
+''' % (fullname, fullname, env, fullname, fullname, compiler, str(emcc_args), embetter, quantum_size, typed_arrays, fullname))
     return TT
 
   # Make one run with the defaults
@@ -10092,16 +10108,14 @@ TT = %s
   exec('''asm2x86 = make_run("asm2x86", compiler=CLANG, emcc_args=["-O2", "-g", "-s", "CHECK_HEAP_ALIGN=1"], env='{"EMCC_LLVM_TARGET": "i386-pc-linux-gnu"}')''')
 
   # Make custom runs with various options
-  for compiler, quantum, embetter, typed_arrays, llvm_opts in [
-    (CLANG, 4, 0, 0, 0),
-    (CLANG, 4, 0, 0, 1),
-    (CLANG, 4, 1, 1, 0),
-    (CLANG, 4, 1, 1, 1),
+  for compiler, quantum, embetter, typed_arrays in [
+    (CLANG, 4, 0, 0),
+    (CLANG, 4, 1, 1),
   ]:
-    fullname = 's_%d_%d%s%s' % (
-      llvm_opts, embetter, '' if quantum == 4 else '_q' + str(quantum), '' if typed_arrays in [0, 1] else '_t' + str(typed_arrays)
+    fullname = 's_0_%d%s%s' % (
+      embetter, '' if quantum == 4 else '_q' + str(quantum), '' if typed_arrays in [0, 1] else '_t' + str(typed_arrays)
     )
-    exec('%s = make_run(fullname, %r,%r,%d,%d,%d,%d)' % (fullname, fullname, compiler, llvm_opts, embetter, quantum, typed_arrays))
+    exec('%s = make_run(fullname, %r,%r,%d,%d,%d)' % (fullname, fullname, compiler, embetter, quantum, typed_arrays))
 
   del T # T is just a shape for the specific subclasses, we don't test it itself
 
@@ -10235,23 +10249,25 @@ Options that are modified or new in %s include:
           (['-o', 'something.js', '-O3', '-s', 'ASM_JS=0'], 3, None, 1, 1),
           # and, test compiling to bitcode first
           (['-o', 'something.bc'], 0, [],      0, 0),
-          (['-o', 'something.bc'], 0, ['-O0'], 0, 0),
-          (['-o', 'something.bc'], 1, ['-O1'], 0, 0),
-          (['-o', 'something.bc'], 2, ['-O2'], 0, 0),
-          (['-o', 'something.bc'], 3, ['-O3', '-s', 'ASM_JS=0'], 1, 0),
-          (['-O1', '-o', 'something.bc'], 0, [], 0, 0), # -Ox is ignored and warned about
+          (['-o', 'something.bc', '-O0'], 0, [], 0, 0),
+          (['-o', 'something.bc', '-O1'], 1, ['-O1'], 0, 0),
+          (['-o', 'something.bc', '-O2'], 2, ['-O2'], 0, 0),
+          (['-o', 'something.bc', '-O3'], 3, ['-O3', '-s', 'ASM_JS=0'], 1, 0),
+          (['-O1', '-o', 'something.bc'], 1, [], 0, 0),
         ]:
           print params, opt_level, bc_params, closure, has_malloc
           self.clear()
           keep_debug = '-g' in params
-          output = Popen([PYTHON, compiler, path_from_root('tests', 'hello_world_loop' + ('_malloc' if has_malloc else '') + '.cpp')] + params,
+          args = [PYTHON, compiler, path_from_root('tests', 'hello_world_loop' + ('_malloc' if has_malloc else '') + '.cpp')] + params
+          print '..', args
+          output = Popen(args,
                          stdout=PIPE, stderr=PIPE).communicate()
           assert len(output[0]) == 0, output[0]
           if bc_params is not None:
-            if '-O1' in params and 'something.bc' in params:
-              assert '-Ox flags ignored, since not generating JavaScript' in output[1]
             assert os.path.exists('something.bc'), output[1]
-            output = Popen([PYTHON, compiler, 'something.bc', '-o', 'something.js'] + bc_params, stdout=PIPE, stderr=PIPE).communicate()
+            bc_args = [PYTHON, compiler, 'something.bc', '-o', 'something.js'] + bc_params
+            print '....', bc_args
+            output = Popen(bc_args, stdout=PIPE, stderr=PIPE).communicate()
           assert os.path.exists('something.js'), output[1]
           assert ('Applying some potentially unsafe optimizations!' in output[1]) == (opt_level >= 3), 'unsafe warning should appear in opt >= 3'
           self.assertContained('hello, world!', run_js('something.js'))
@@ -11500,64 +11516,6 @@ seeked= file.
       code = open('a.out.js').read()
       assert 'SAFE_HEAP' in code, 'valid -s option had an effect'
 
-    def test_optimize_normally(self):
-      assert not os.environ.get('EMCC_OPTIMIZE_NORMALLY')
-      assert not os.environ.get('EMCC_DEBUG')
-
-      for optimize_normally in [0, 1]:
-        print optimize_normally
-        try:
-          if optimize_normally: os.environ['EMCC_OPTIMIZE_NORMALLY'] = '1'
-          os.environ['EMCC_DEBUG'] = '1'
-
-          open(self.in_dir('main.cpp'), 'w').write(r'''
-            extern "C" {
-              void something();
-            }
-
-            int main() {
-              something();
-              return 0;
-            }
-          ''')
-          open(self.in_dir('supp.cpp'), 'w').write(r'''
-            #include <stdio.h>
-
-            extern "C" {
-              void something() {
-                printf("yello\n");
-              }
-            }
-          ''')
-          out, err = Popen([PYTHON, EMCC, self.in_dir('main.cpp'), '-O2', '-o', 'main.o'], stdout=PIPE, stderr=PIPE).communicate()
-          assert ("emcc: LLVM opts: ['-O3']" in err) == optimize_normally
-          assert (' with -O3 since EMCC_OPTIMIZE_NORMALLY defined' in err) == optimize_normally
-
-          out, err = Popen([PYTHON, EMCC, self.in_dir('supp.cpp'), '-O2', '-o', 'supp.o'], stdout=PIPE, stderr=PIPE).communicate()
-          assert ("emcc: LLVM opts: ['-O3']" in err) == optimize_normally
-          assert (' with -O3 since EMCC_OPTIMIZE_NORMALLY defined' in err) == optimize_normally
-
-          out, err = Popen([PYTHON, EMCC, self.in_dir('main.o'), self.in_dir('supp.o'), '-O2', '-o', 'both.o'], stdout=PIPE, stderr=PIPE).communicate()
-          assert "emcc: LLVM opts: ['-O3']" not in err
-          assert ' with -O3 since EMCC_OPTIMIZE_NORMALLY defined' not in err
-          assert ('despite EMCC_OPTIMIZE_NORMALLY since not source code' in err) == optimize_normally
-
-          out, err = Popen([PYTHON, EMCC, self.in_dir('main.cpp'), self.in_dir('supp.cpp'), '-O2', '-o', 'both2.o'], stdout=PIPE, stderr=PIPE).communicate()
-          assert ("emcc: LLVM opts: ['-O3']" in err) == optimize_normally
-          assert (' with -O3 since EMCC_OPTIMIZE_NORMALLY defined' in err) == optimize_normally
-
-          for last in ['both.o', 'both2.o']:
-            out, err = Popen([PYTHON, EMCC, self.in_dir('both.o'), '-O2', '-o', last + '.js', '--memory-init-file', '0'], stdout=PIPE, stderr=PIPE).communicate()
-            assert ("emcc: LLVM opts: ['-O3']" not in err) == optimize_normally
-            assert ' with -O3 since EMCC_OPTIMIZE_NORMALLY defined' not in err
-            output = run_js(last + '.js')
-            assert 'yello' in output, 'code works ' + err
-          assert open('both.o.js').read() == open('both2.o.js').read()
-
-        finally:
-          if optimize_normally: del os.environ['EMCC_OPTIMIZE_NORMALLY']
-          del os.environ['EMCC_DEBUG']
-
     def test_jcache_printf(self):
       open(self.in_dir('src.cpp'), 'w').write(r'''
         #include <stdio.h>
@@ -13481,9 +13439,10 @@ process(sys.argv[1])
       try_delete(final_filename)
       output = Popen([PYTHON, EMCC, filename, #'-O3',
                       '-O2', '-s', 'DOUBLE_MODE=0', '-s', 'PRECISE_I64_MATH=0',
-                      '--llvm-lto', '1', '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
+                      '--llvm-lto', '3', '--memory-init-file', '0', '--js-transform', 'python hardcode.py',
                       '-s', 'TOTAL_MEMORY=128*1024*1024',
                       '--closure', '1',
+                      #'-g',
                       '-o', final_filename] + shared_args + emcc_args, stdout=PIPE, stderr=self.stderr_redirect).communicate()
       assert os.path.exists(final_filename), 'Failed to compile file: ' + output[0]
 
@@ -13798,7 +13757,7 @@ process(sys.argv[1])
       args = [path_from_root('tests', 'nbody-java', x) for x in os.listdir(path_from_root('tests', 'nbody-java')) if x.endswith('.c')] + \
              ['-I' + path_from_root('tests', 'nbody-java')]
       self.do_benchmark('nbody_java', '', '''Time(s)''',
-                        force_c=True, emcc_args=args + ['-s', 'PRECISE_I64_MATH=1', '--llvm-lto', '0'], native_args=args + ['-lgc', '-std=c99', '-target', 'x86_64-pc-linux-gnu', '-lm'])
+                        force_c=True, emcc_args=args + ['-s', 'PRECISE_I64_MATH=1', '--llvm-lto', '2'], native_args=args + ['-lgc', '-std=c99', '-target', 'x86_64-pc-linux-gnu', '-lm'])
 
     def lua(self, benchmark, expected, output_parser=None, args_processor=None):
       shutil.copyfile(path_from_root('tests', 'lua', benchmark + '.lua'), benchmark + '.lua')
@@ -14190,8 +14149,7 @@ fi
           try_delete('a.out.js')
 
           basebc_name = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-0-basebc.bc')
-          dcebc_name1 = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-1-linktime.bc')
-          dcebc_name2 = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-2-linktime.bc')
+          dcebc_name = os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-1-linktime.bc')
           ll_names = [os.path.join(TEMP_DIR, 'emscripten_temp', 'emcc-X-ll.ll').replace('X', str(x)) for x in range(2,5)]
 
           # Building a file that *does* need dlmalloc *should* trigger cache generation, but only the first time
@@ -14199,7 +14157,6 @@ fi
             for i in range(3):
               print filename, libname, i
               self.clear()
-              dcebc_name = dcebc_name1 if i == 0 else dcebc_name2
               try_delete(basebc_name) # we might need to check this file later
               try_delete(dcebc_name) # we might need to check this file later
               for ll_name in ll_names: try_delete(ll_name)
@@ -14216,9 +14173,9 @@ fi
               assert os.path.exists(os.path.join(EMCC_CACHE, libname + '.bc'))
               if libname == 'libcxx':
                 print os.stat(os.path.join(EMCC_CACHE, libname + '.bc')).st_size, os.stat(basebc_name).st_size, os.stat(dcebc_name).st_size
-                assert os.stat(os.path.join(EMCC_CACHE, libname + '.bc')).st_size > 1800000, 'libc++ is big'
-                assert os.stat(basebc_name).st_size > 1800000, 'libc++ is indeed big'
-                assert os.stat(dcebc_name).st_size < 750000, 'Dead code elimination must remove most of libc++'
+                assert os.stat(os.path.join(EMCC_CACHE, libname + '.bc')).st_size > 1000000, 'libc++ is big'
+                assert os.stat(basebc_name).st_size > 1000000, 'libc++ is indeed big'
+                assert os.stat(dcebc_name).st_size < 500000, 'Dead code elimination must remove most of libc++'
               # should only have metadata in -O0, not 1 and 2
               if i > 0:
                 for ll_name in ll_names:
@@ -14305,7 +14262,7 @@ fi
           (['--jcache'], 'hello_malloc.cpp', False, True, False, True, False, True, []),
           ([], 'hello_malloc.cpp', False, False, False, False, False, False, []),
           # new, huge file
-          ([], 'hello_libcxx.cpp', False, False, False, False, False, False, ('4 chunks',)),
+          ([], 'hello_libcxx.cpp', False, False, False, False, False, False, ('3 chunks',)),
           (['--jcache'], 'hello_libcxx.cpp', True, False, True, False, True, False, []),
           (['--jcache'], 'hello_libcxx.cpp', False, True, False, True, False, True, []),
           ([], 'hello_libcxx.cpp', False, False, False, False, False, False, []),
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js
index 151e573a..d04807a7 100644
--- a/tools/js-optimizer.js
+++ b/tools/js-optimizer.js
@@ -188,12 +188,12 @@ function traverseChildren(node, traverse, pre, post, stack) {
 //     was stopped, true. Otherwise undefined.
 function traverse(node, pre, post, stack) {
   var type = node[0], result, len;
-  var relevant = typeof node[0] === 'string';
+  var relevant = typeof type === 'string';
   if (relevant) {
     if (stack) len = stack.length;
     var result = pre(node, type, stack);
     if (result === true) return true;
-    if (Array.isArray(result)) node = result; // Continue processing on this node
+    if (result && result !== null) node = result; // Continue processing on this node
     if (stack && len === stack.length) stack.push(0);
   }
   if (result !== null) {
@@ -584,6 +584,8 @@ function simplifyExpressionsPre(ast) {
             node[3] = value[2];
           }
         }
+      } else if (type == 'sub' && node[1][0] == 'name' && /^FUNCTION_TABLE.*/.exec(node[1][1])) {
+        return null; // do not traverse subchildren here, we should not collapse 55 & 126. TODO: optimize this into a nonvirtual call (also because we lose some other opts here)!
       }
     });
 
@@ -2035,7 +2037,6 @@ function eliminate(ast, memSafe) {
     // examine body and note locals
     var hasSwitch = false;
     traverse(func, function(node, type) {
-      if (debug && type) type = type.toString();
       if (type === 'var') {
         var node1 = node[1];
         for (var i = 0; i < node1.length; i++) {
@@ -2645,7 +2646,6 @@ function eliminate(ast, memSafe) {
             var has_num = false;
             var fail = false;
             traverse(node, function(subNode, subType) {
-              if (debug && subType) subType = subType.toString();
               if (subType === 'binary') {
                 if (subNode[1] !== '+') {
                   fail = true;
@@ -2807,6 +2807,23 @@ function asmLoopOptimizer(ast) {
         var stats = node[2][1];
         var last = stats[stats.length-1];
         if (last && last[0] === 'if' && !last[3] && last[2][0] === 'block' && last[2][1][0] && last[2][1][0][0] === 'break' && !last[2][1][0][1]) {
+          var abort = false;
+          var stack = 0;
+          traverse(stats, function(node, type) {
+            if (type == 'continue') {
+              if (stack == 0 || node[1]) { // abort if labeled (we do not analyze labels here yet), or a continue directly on us
+                abort = true;
+                return true;
+              }
+            } else if (type in LOOP) {
+              stack++;
+            }
+          }, function(node, type) {
+            if (type in LOOP) {
+              stack--;
+            }
+          });
+          if (abort) return;
           var conditionToBreak = last[1];
           stats.pop();
           node[0] = 'do';
diff --git a/tools/shared.py b/tools/shared.py
index 2a754d27..776001cd 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1,6 +1,7 @@
 import shutil, time, os, sys, json, tempfile, copy, shlex, atexit, subprocess, hashlib, cPickle, re
 from subprocess import Popen, PIPE, STDOUT
 from tempfile import mkstemp
+from distutils.spawn import find_executable
 import jsrun, cache, tempfiles
 from response_file import create_response_file
 import logging, platform
@@ -204,25 +205,12 @@ else:
     config_file = '\n'.join(config_file)
     # autodetect some default paths
     config_file = config_file.replace('{{{ EMSCRIPTEN_ROOT }}}', __rootpath__)
-    llvm_root = '/usr/bin'
-    try:
-      llvm_root = os.path.dirname(Popen(['which', 'llvm-dis'], stdout=PIPE).communicate()[0].replace('\n', ''))
-    except:
-      pass
+    llvm_root = find_executable('llvm-dis') or '/usr/bin'
     config_file = config_file.replace('{{{ LLVM_ROOT }}}', llvm_root)
-    node = 'node'
-    try:
-      node = Popen(['which', 'node'], stdout=PIPE).communicate()[0].replace('\n', '') or \
-             Popen(['which', 'nodejs'], stdout=PIPE).communicate()[0].replace('\n', '') or node
-    except:
-      pass
+    node = find_executable('node') or find_executable('nodejs') or 'node'
     config_file = config_file.replace('{{{ NODE }}}', node)
-    python = sys.executable or 'python'
-    try:
-      python = Popen(['which', 'python2'], stdout=PIPE).communicate()[0].replace('\n', '') or \
-               Popen(['which', 'python'], stdout=PIPE).communicate()[0].replace('\n', '') or python
-    except:
-      pass
+    python = find_executable('python2') or find_executable('python') or \
+        sys.executable or 'python'
     config_file = config_file.replace('{{{ PYTHON }}}', python)    
 
     # write
@@ -295,7 +283,7 @@ def check_node_version():
 # we re-check sanity when the settings are changed)
 # We also re-check sanity and clear the cache when the version changes
 
-EMSCRIPTEN_VERSION = '1.5.1'
+EMSCRIPTEN_VERSION = '1.5.3'
 
 def generate_sanity():
   return EMSCRIPTEN_VERSION + '|' + get_llvm_target()
@@ -324,6 +312,7 @@ def check_sanity(force=False):
     if reason:
       logging.warning('(Emscripten: %s, clearing cache)' % reason)
       Cache.erase()
+      force = False # the check actually failed, so definitely write out the sanity file, to avoid others later seeing failures too
 
     # some warning, not fatal checks - do them even if EM_IGNORE_SANITY is on
     check_llvm_version()
@@ -944,7 +933,7 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)''' % { 'winfix': '' if not WINDOWS e
 
     # Finish link
     actual_files = unique_ordered(actual_files) # tolerate people trying to link a.so a.so etc.
-    logging.debug('emcc: llvm-linking: %s', actual_files)
+    logging.debug('emcc: llvm-linking: %s to %s', actual_files, target)
 
     # check for too-long command line
     link_cmd = [LLVM_LINK] + actual_files + ['-o', target]
@@ -1111,7 +1100,9 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)''' % { 'winfix': '' if not WINDOWS e
 
   @staticmethod
   def get_safe_internalize():
-    exports = ','.join(map(lambda exp: exp[1:], expand_response(Settings.EXPORTED_FUNCTIONS)))
+    exps = expand_response(Settings.EXPORTED_FUNCTIONS)
+    if '_malloc' not in exps: exps.append('_malloc') # needed internally, even if user did not add to EXPORTED_FUNCTIONS
+    exports = ','.join(map(lambda exp: exp[1:], exps))
     # internalize carefully, llvm 3.2 will remove even main if not told not to
     return ['-internalize', '-internalize-public-api-list=' + exports]
 
@@ -1293,9 +1284,6 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)''' % { 'winfix': '' if not WINDOWS e
       emcc_debug = os.environ.get('EMCC_DEBUG')
       if emcc_debug: del os.environ['EMCC_DEBUG']
 
-      emcc_optimize_normally = os.environ.get('EMCC_OPTIMIZE_NORMALLY')
-      if emcc_optimize_normally: del os.environ['EMCC_OPTIMIZE_NORMALLY']
-
       def make(opt_level):
         raw = relooper + '.raw.js'
         Building.emcc(os.path.join('relooper', 'Relooper.cpp'), ['-I' + os.path.join('relooper'), '--post-js',
@@ -1326,7 +1314,6 @@ set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)''' % { 'winfix': '' if not WINDOWS e
     finally:
       os.chdir(curr)
       if emcc_debug: os.environ['EMCC_DEBUG'] = emcc_debug
-      if emcc_optimize_normally: os.environ['EMCC_OPTIMIZE_NORMALLY'] = emcc_optimize_normally
       if not ok:
         logging.error('bootstrapping relooper failed. You may need to manually create relooper.js by compiling it, see src/relooper/emscripten')
         1/0
@@ -1392,6 +1379,17 @@ def execute(cmd, *args, **kw):
     logging.error('Invoking Process failed: <<< ' + cmd + ' >>>')
     raise
 
+def check_execute(cmd, *args, **kw):
+  # TODO: use in more places. execute doesn't actually check that return values
+  # are nonzero
+  try:
+    kw['stderr'] = STDOUT
+    subprocess.check_output(cmd, *args, **kw)
+    logging.debug("Successfuly executed %s" % " ".join(cmd))
+  except subprocess.CalledProcessError as e:
+    logging.error("'%s' failed with output:\n%s" % (" ".join(e.cmd), e.output))
+    raise
+
 def suffix(name):
   parts = name.split('.')
   if len(parts) > 1:
diff --git a/tools/source-maps/sourcemapper.js b/tools/source-maps/sourcemapper.js
index 0001e7c7..fa908900 100755
--- a/tools/source-maps/sourcemapper.js
+++ b/tools/source-maps/sourcemapper.js
@@ -110,7 +110,7 @@ function generateMap(mappings, sourceRoot, mapFileBaseName, generatedLineOffset)
       try {
         generator.setSourceContent(originalFileName, fs.readFileSync(rootedPath, 'utf-8'));
       } catch (e) {
-        console.warn("Unable to find original file for " + originalFileName +
+        console.warn("sourcemapper: Unable to find original file for " + originalFileName +
           " at " + rootedPath);
       }
     }
diff --git a/tools/test-js-optimizer-asm-last-output.js b/tools/test-js-optimizer-asm-last-output.js
index cbc0a4d3..0f95d544 100644
--- a/tools/test-js-optimizer-asm-last-output.js
+++ b/tools/test-js-optimizer-asm-last-output.js
@@ -42,5 +42,33 @@ function looop() {
  do {
   do_it();
  } while (x());
+ while (1) {
+  do_it();
+  if (a()) continue;
+  if (!x()) {
+   break;
+  }
+ }
+ do {
+  do_it();
+  do {
+   if (a()) continue;
+  } while (b());
+ } while (x());
+ do {
+  do_it();
+  while (b()) {
+   if (a()) continue;
+  }
+ } while (x());
+ X : while (1) {
+  do_it();
+  while (b()) {
+   if (a()) continue X;
+  }
+  if (!x()) {
+   break;
+  }
+ }
 }
 
diff --git a/tools/test-js-optimizer-asm-last.js b/tools/test-js-optimizer-asm-last.js
index 6331879e..05e1049e 100644
--- a/tools/test-js-optimizer-asm-last.js
+++ b/tools/test-js-optimizer-asm-last.js
@@ -51,6 +51,40 @@ function looop() {
    break;
   }
  }
+ while (1) {
+  do_it();
+  if (a()) continue; // we cannot move to do-while, continue will hit the while check
+  if (!x()) {
+   break;
+  }
+ }
+ while (1) {
+  do_it();
+  do {
+    if (a()) continue; // ok to optimize, continue is not for us
+  } while (b());
+  if (!x()) {
+   break;
+  }
+ }
+ while (1) {
+  do_it();
+  while (b()) {
+    if (a()) continue; // also ok to optimize, continue is not for us
+  }
+  if (!x()) {
+   break;
+  }
+ }
+ X: while (1) {
+  do_it();
+  while (b()) {
+    if (a()) continue X; // not ok to optimize
+  }
+  if (!x()) {
+   break;
+  }
+ }
 }
 // EMSCRIPTEN_GENERATED_FUNCTIONS: ["finall", "looop"]
 
diff --git a/tools/test-js-optimizer-asm-pre-output.js b/tools/test-js-optimizer-asm-pre-output.js
index 2cd8d407..301a2ec8 100644
--- a/tools/test-js-optimizer-asm-pre-output.js
+++ b/tools/test-js-optimizer-asm-pre-output.js
@@ -19,6 +19,7 @@ function a() {
  f(g() | 0 & -1);
  f((g() | 0) >> 2);
  $56 = _fcntl() | 0 | 1;
+ FUNCTION_TABLE_ii[55 & 127]() | 0;
 }
 function b($this, $__n) {
  $this = $this | 0;
diff --git a/tools/test-js-optimizer-asm-pre.js b/tools/test-js-optimizer-asm-pre.js
index ca7d2894..c7c92124 100644
--- a/tools/test-js-optimizer-asm-pre.js
+++ b/tools/test-js-optimizer-asm-pre.js
@@ -20,6 +20,7 @@ function a() {
  f(g() | 0 & -1);
  f((g() | 0) >> 2);
  $56 = (_fcntl() | 0) | 1;
+ FUNCTION_TABLE_ii[55 & 127]() | 0;
 }
 function b($this, $__n) {
  $this = $this | 0;