13 files changed, 614 insertions, 163 deletions
diff --git a/AUTHORS b/AUTHORS
index 137efb00..dc848d34 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -139,3 +139,6 @@ a license to everyone to use it as detailed in LICENSE.)
 * Usagi Ito <usagi@WonderRabbitProject.net>
 * Camilo Polymeris <cpolymeris@gmail.com>
 * Markus Henschel <markus.henschel@yager.de>
+* Ophir Lojkine <ophir.lojkine@eleves.ec-nantes.fr>
+* Ryan Sturgell <ryan.sturgell@gmail.com> (copyright owned by Google, Inc.)
+
diff --git a/emcc b/emcc
index 2dc525af..1a645965 100755
--- a/emcc
+++ b/emcc
@@ -1098,12 +1098,24 @@ try:
 
   # Find input files
 
+  # These three arrays are used to store arguments of different types for
+  # type-specific processing. In order to shuffle the arguments back together
+  # after processing, all of these arrays hold tuples (original_index, value).
+  # Note that the index part of the tuple can have a fractional part for input
+  # arguments that expand into multiple processed arguments, as in -Wl,-f1,-f2.
   input_files = []
+  libs = []
+  link_flags = []
+
+  # All of the above arg lists entries contain indexes into the full argument
+  # list. In order to add extra implicit args (embind.cc, etc) below, we keep a
+  # counter for the next index that should be used.
+  next_arg_index = len(newargs)
+
   has_source_inputs = False
   has_header_inputs = False
   lib_dirs = [shared.path_from_root('system', 'local', 'lib'),
               shared.path_from_root('system', 'lib')]
-  libs = []
   for i in range(len(newargs)): # find input files XXX this a simple heuristic. we should really analyze based on a full understanding of gcc params,
                                 # right now we just assume that what is left contains no more |-x OPT| things
     arg = newargs[i]
@@ -1124,13 +1136,13 @@ try:
       if arg_ending.endswith(SOURCE_ENDINGS + BITCODE_ENDINGS + DYNAMICLIB_ENDINGS + ASSEMBLY_ENDINGS + HEADER_ENDINGS) or shared.Building.is_ar(arg): # we already removed -o <target>, so all these should be inputs
         newargs[i] = ''
         if arg_ending.endswith(SOURCE_ENDINGS):
-          input_files.append(arg)
+          input_files.append((i, arg))
           has_source_inputs = True
         elif arg_ending.endswith(HEADER_ENDINGS):
-          input_files.append(arg)
+          input_files.append((i, arg))
           has_header_inputs = True
         elif arg_ending.endswith(ASSEMBLY_ENDINGS) or shared.Building.is_bitcode(arg): # this should be bitcode, make sure it is valid
-          input_files.append(arg)
+          input_files.append((i, arg))
         elif arg_ending.endswith(STATICLIB_ENDINGS + DYNAMICLIB_ENDINGS):
           # if it's not, and it's a library, just add it to libs to find later
           l = unsuffixed_basename(arg)
@@ -1139,7 +1151,7 @@ try:
             if l.startswith(prefix):
               l = l[len(prefix):]
               break
-          libs.append(l)
+          libs.append((i, l))
           newargs[i] = ''
         else:
           logging.warning(arg + ' is not valid LLVM bitcode')
@@ -1157,7 +1169,15 @@ try:
       lib_dirs.append(arg[2:])
       newargs[i] = ''
     elif arg.startswith('-l'):
-      libs.append(arg[2:])
+      libs.append((i, arg[2:]))
+      newargs[i] = ''
+    elif arg.startswith('-Wl,'):
+      # Multiple comma separated link flags can be specified. Create fake
+      # fractional indices for these: -Wl,a,b,c,d at index 4 becomes:
+      # (4, a), (4.25, b), (4.5, c), (4.75, d)
+      link_flags_to_add = arg.split(',')[1:]
+      for flag_index, flag in enumerate(link_flags_to_add):
+        link_flags.append((i + float(flag_index) / len(link_flags_to_add), flag))
       newargs[i] = ''
 
   original_input_files = input_files[:]
@@ -1173,7 +1193,7 @@ try:
   final_ending = ('.' + final_suffix) if len(final_suffix) > 0 else ''
 
   # Find library files
-  for lib in libs:
+  for i, lib in libs:
     logging.debug('looking for library "%s"', lib)
     found = False
     for prefix in LIB_PREFIXES:
@@ -1183,7 +1203,7 @@ try:
           path = os.path.join(lib_dir, name)
           if os.path.exists(path):
             logging.debug('found library "%s" at %s', lib, path)
-            input_files.append(path)
+            input_files.append((i, path))
             found = True
             break
         if found: break
@@ -1199,7 +1219,7 @@ try:
         return False
       else:
         return True
-    input_files = [input_file for input_file in input_files if check(input_file)]
+    input_files = [(i, input_file) for (i, input_file) in input_files if check(input_file)]
 
   if len(input_files) == 0:
     logging.error('no input files\nnote that input files without a known suffix are ignored, make sure your input files end with one of: ' + str(SOURCE_ENDINGS + BITCODE_ENDINGS + DYNAMICLIB_ENDINGS + STATICLIB_ENDINGS + ASSEMBLY_ENDINGS + HEADER_ENDINGS))
@@ -1211,7 +1231,8 @@ try:
 
   # If we are using embind and generating JS, now is the time to link in bind.cpp
   if bind and final_suffix in JS_CONTAINING_SUFFIXES:
-    input_files.append(shared.path_from_root('system', 'lib', 'embind', 'bind.cpp'))
+    input_files.append((next_arg_index, shared.path_from_root('system', 'lib', 'embind', 'bind.cpp')))
+    next_arg_index += 1
 
   # Apply optimization level settings
   shared.Settings.apply_opt_level(opt_level, noisy=True)
@@ -1331,7 +1352,8 @@ try:
     logging.warning('ALIASING_FUNCTION_POINTERS is on, function pointer comparisons may be invalid across types')
 
   if shared.Settings.STB_IMAGE and final_suffix in JS_CONTAINING_SUFFIXES:
-    input_files.append(shared.path_from_root('third_party', 'stb_image.c'))
+    input_files.append((next_arg_index, shared.path_from_root('third_party', 'stb_image.c')))
+    next_arg_index += 1
     shared.Settings.EXPORTED_FUNCTIONS += ['_stbi_load', '_stbi_load_from_memory', '_stbi_image_free']
 
   if type(shared.Settings.EXPORTED_FUNCTIONS) in (list, tuple):
@@ -1365,9 +1387,10 @@ try:
 
   # Precompiled headers support
   if has_header_inputs:
-    for header in input_files:
-      assert header.endswith(HEADER_ENDINGS), 'if you have one header input, we assume you want to precompile headers, and cannot have source files or other inputs as well: ' + str(input_files) + ' : ' + header
-    args = newargs + shared.EMSDK_CXX_OPTS + input_files
+    headers = [header for _, header in input_files]
+    for header in headers:
+      assert header.endswith(HEADER_ENDINGS), 'if you have one header input, we assume you want to precompile headers, and cannot have source files or other inputs as well: ' + str(headers) + ' : ' + header
+    args = newargs + shared.EMSDK_CXX_OPTS + headers
     if specified_target:
       args += ['-o', specified_target]
     logging.debug("running (for precompiled headers): " + call + ' ' + ' '.join(args))
@@ -1388,12 +1411,12 @@ try:
     return in_temp(unsuffixed(uniquename(input_file)) + default_object_extension)
 
   # First, generate LLVM bitcode. For each input file, we get base.o with bitcode
-  for input_file in input_files:
+  for i, input_file in input_files:
     file_ending = filename_type_ending(input_file)
     if file_ending.endswith(SOURCE_ENDINGS):
       logging.debug('compiling source file: ' + input_file)
       output_file = get_bitcode_file(input_file)
-      temp_files.append(output_file)
+      temp_files.append((i, output_file))
       args = newargs + ['-emit-llvm', '-c', input_file, '-o', output_file]
       if file_ending.endswith(CXX_ENDINGS):
         args += shared.EMSDK_CXX_OPTS
@@ -1407,18 +1430,18 @@ try:
         logging.debug('copying bitcode file: ' + input_file)
         temp_file = in_temp(unsuffixed(uniquename(input_file)) + '.o')
         shutil.copyfile(input_file, temp_file)
-        temp_files.append(temp_file)
+        temp_files.append((i, temp_file))
       elif file_ending.endswith(DYNAMICLIB_ENDINGS) or shared.Building.is_ar(input_file):
         logging.debug('copying library file: ' + input_file)
         temp_file = in_temp(uniquename(input_file))
         shutil.copyfile(input_file, temp_file)
-        temp_files.append(temp_file)
+        temp_files.append((i, temp_file))
       elif file_ending.endswith(ASSEMBLY_ENDINGS):
         if not LEAVE_INPUTS_RAW:
           logging.debug('assembling assembly file: ' + input_file)
           temp_file = in_temp(unsuffixed(uniquename(input_file)) + '.o')
           shared.Building.llvm_as(input_file, temp_file)
-          temp_files.append(temp_file)
+          temp_files.append((i, temp_file))
       else:
         logging.error(input_file + ': Unknown file suffix when compiling to LLVM bitcode!')
         sys.exit(1)
@@ -1430,10 +1453,10 @@ try:
 
     # Optimize source files
     if llvm_opts > 0:
-      for i, input_file in enumerate(input_files):
+      for pos, (_, input_file) in enumerate(input_files):
         file_ending = filename_type_ending(input_file)
         if file_ending.endswith(SOURCE_ENDINGS):
-          temp_file = temp_files[i]
+          (_, temp_file) = temp_files[pos]
           logging.debug('optimizing %s', input_file)
           #if DEBUG: shutil.copyfile(temp_file, os.path.join(TEMP_DIR, 'to_opt.bc') # useful when LLVM opt aborts
           shared.Building.llvm_opt(temp_file, llvm_opts)
@@ -1441,26 +1464,30 @@ try:
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in JS_CONTAINING_SUFFIXES:
     if not specified_target:
-      for input_file in input_files:
+      for _, input_file in input_files:
         safe_move(get_bitcode_file(input_file), unsuffixed_basename(input_file) + final_ending)
     else:
       if len(input_files) == 1:
-        safe_move(temp_files[0], specified_target if specified_target else unsuffixed_basename(input_file) + final_ending)
-        temp_output_base = unsuffixed(temp_files[0])
+        _, input_file = input_files[0]
+        _, temp_file = temp_files[0]
+        safe_move(temp_file, specified_target if specified_target else unsuffixed_basename(input_file) + final_ending)
+        temp_output_base = unsuffixed(temp_file)
         if os.path.exists(temp_output_base + '.d'):
           # There was a .d file generated, from -MD or -MMD and friends, save a copy of it to where the output resides,
           # adjusting the target name away from the temporary file name to the specified target.
           # It will be deleted with the rest of the temporary directory.
           deps = open(temp_output_base + '.d').read()
           deps = deps.replace(temp_output_base + default_object_extension, specified_target)
-          with open(os.path.join(os.path.dirname(specified_target), os.path.basename(unsuffixed(input_files[0]) + '.d')), "w") as out_dep:
+          with open(os.path.join(os.path.dirname(specified_target), os.path.basename(unsuffixed(input_file) + '.d')), "w") as out_dep:
             out_dep.write(deps)
       else:
         assert len(original_input_files) == 1 or not has_dash_c, 'fatal error: cannot specify -o with -c with multiple files' + str(sys.argv) + ':' + str(original_input_files)
         # We have a specified target (-o <target>), which is not JavaScript or HTML, and
         # we have multiple files: Link them
         logging.debug('link: ' + str(temp_files) + specified_target)
-        shared.Building.link(temp_files, specified_target)
+        # Sort arg tuples and pass the extracted values to link.
+        link_args = [f for (i, f) in sorted(temp_files)]
+        shared.Building.link(link_args, specified_target)
     logging.debug('stopping at bitcode')
     exit(0)
 
@@ -1473,7 +1500,7 @@ try:
   if not LEAVE_INPUTS_RAW and \
      not shared.Settings.BUILD_AS_SHARED_LIB and \
      not shared.Settings.SIDE_MODULE: # shared libraries/side modules link no C libraries, need them in parent
-    extra_files_to_link = system_libs.calculate(temp_files, in_temp, stdout, stderr)
+    extra_files_to_link = system_libs.calculate([f for _, f in sorted(temp_files)], in_temp, stdout, stderr)
   else:
     extra_files_to_link = []
 
@@ -1481,18 +1508,20 @@ try:
 
   # First, combine the bitcode files if there are several. We must also link if we have a singleton .a
   if len(input_files) + len(extra_files_to_link) > 1 or \
-     (not LEAVE_INPUTS_RAW and not (suffix(temp_files[0]) in BITCODE_ENDINGS or suffix(temp_files[0]) in DYNAMICLIB_ENDINGS) and shared.Building.is_ar(temp_files[0])):
-    linker_inputs = temp_files + extra_files_to_link
+     (not LEAVE_INPUTS_RAW and not (suffix(temp_files[0][1]) in BITCODE_ENDINGS or suffix(temp_files[0][1]) in DYNAMICLIB_ENDINGS) and shared.Building.is_ar(temp_files[0][1])):
+    linker_inputs = [val for _, val in sorted(temp_files + link_flags)] + extra_files_to_link
     logging.debug('linking: ' + str(linker_inputs))
-    shared.Building.link(linker_inputs, in_temp(target_basename + '.bc'), force_archive_contents=len([temp for temp in temp_files if not temp.endswith(STATICLIB_ENDINGS)]) == 0)
+    shared.Building.link(linker_inputs, in_temp(target_basename + '.bc'), force_archive_contents=len([temp for i, temp in temp_files if not temp.endswith(STATICLIB_ENDINGS)]) == 0)
     final = in_temp(target_basename + '.bc')
   else:
     if not LEAVE_INPUTS_RAW:
-      shutil.move(temp_files[0], in_temp(target_basename + '.bc'))
+      _, temp_file = temp_files[0]
+      shutil.move(temp_file, in_temp(target_basename + '.bc'))
       final = in_temp(target_basename + '.bc')
     else:
-      final = in_temp(input_files[0])
-      shutil.copyfile(input_files[0], final)
+      _, input_file = input_files[0]
+      final = in_temp(input_file)
+      shutil.copyfile(input_file, final)
 
   log_time('link')
 
diff --git a/src/preamble.js b/src/preamble.js
index 2aec94c6..f46119c7 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -310,28 +310,6 @@ function assert(condition, text) {
 
 var globalScope = this;
 
-// C calling interface. A convenient way to call C functions (in C files, or
-// defined with extern "C").
-//
-// Note: LLVM optimizations can inline and remove functions, after which you will not be
-//       able to call them. Closure can also do so. To avoid that, add your function to
-//       the exports using something like
-//
-//         -s EXPORTED_FUNCTIONS='["_main", "_myfunc"]'
-//
-// @param ident      The name of the C function (note that C++ functions will be name-mangled - use extern "C")
-// @param returnType The return type of the function, one of the JS types 'number', 'string' or 'array' (use 'number' for any C pointer, and
-//                   'array' for JavaScript arrays and typed arrays; note that arrays are 8-bit).
-// @param argTypes   An array of the types of arguments for the function (if there are no arguments, this can be ommitted). Types are as in returnType,
-//                   except that 'array' is not possible (there is no way for us to know the length of the array)
-// @param args       An array of the arguments to the function, as native JS values (as in returnType)
-//                   Note that string arguments will be stored on the stack (the JS string will become a C string on the stack).
-// @return           The return value, as a native JS value (as in returnType)
-function ccall(ident, returnType, argTypes, args) {
-  return ccallFunc(getCFunc(ident), returnType, argTypes, args);
-}
-Module["ccall"] = ccall;
-
 // Returns the C function with a specified identifier (for C++, you need to do manual name mangling)
 function getCFunc(ident) {
   try {
@@ -343,53 +321,141 @@ function getCFunc(ident) {
   return func;
 }
 
-// Internal function that does a C call using a function, not an identifier
-function ccallFunc(func, returnType, argTypes, args) {
+var cwrap, ccall;
+(function(){
   var stack = 0;
-  function toC(value, type) {
-    if (type == 'string') {
-      if (value === null || value === undefined || value === 0) return 0; // null string
-      value = intArrayFromString(value);
-      type = 'array';
-    }
-    if (type == 'array') {
-      if (!stack) stack = Runtime.stackSave();
-      var ret = Runtime.stackAlloc(value.length);
-      writeArrayToMemory(value, ret);
+  var JSfuncs = {
+    'stackSave' : function() {
+      stack = Runtime.stackSave();
+    },
+    'stackRestore' : function() {
+      Runtime.stackRestore(stack);
+    },
+    // type conversion from js to c
+    'arrayToC' : function(arr) {
+      var ret = Runtime.stackAlloc(arr.length);
+      writeArrayToMemory(arr, ret);
+      return ret;
+    },
+    'stringToC' : function(str) {
+      var ret = 0;
+      if (str !== null && str !== undefined && str !== 0) { // null string
+        ret = Runtime.stackAlloc(str.length + 1); // +1 for the trailing '\0'
+        writeStringToMemory(str, ret);
+      }
       return ret;
     }
-    return value;
-  }
-  function fromC(value, type) {
-    if (type == 'string') {
-      return Pointer_stringify(value);
+  };
+  // For fast lookup of conversion functions
+  var toC = {'string' : JSfuncs['stringToC'], 'array' : JSfuncs['arrayToC']};
+
+  // C calling interface. A convenient way to call C functions (in C files, or
+  // defined with extern "C").
+  //
+  // Note: LLVM optimizations can inline and remove functions, after which you will not be
+  //       able to call them. Closure can also do so. To avoid that, add your function to
+  //       the exports using something like
+  //
+  //         -s EXPORTED_FUNCTIONS='["_main", "_myfunc"]'
+  //
+  // @param ident      The name of the C function (note that C++ functions will be name-mangled - use extern "C")
+  // @param returnType The return type of the function, one of the JS types 'number', 'string' or 'array' (use 'number' for any C pointer, and
+  //                   'array' for JavaScript arrays and typed arrays; note that arrays are 8-bit).
+  // @param argTypes   An array of the types of arguments for the function (if there are no arguments, this can be ommitted). Types are as in returnType,
+  //                   except that 'array' is not possible (there is no way for us to know the length of the array)
+  // @param args       An array of the arguments to the function, as native JS values (as in returnType)
+  //                   Note that string arguments will be stored on the stack (the JS string will become a C string on the stack).
+  // @return           The return value, as a native JS value (as in returnType)
+  ccall = function ccallFunc(ident, returnType, argTypes, args) {
+    var func = getCFunc(ident);
+    var cArgs = [];
+#if ASSERTIONS
+    assert(returnType !== 'array', 'Return type should not be "array".');
+#endif
+    if (args) {
+      for (var i = 0; i < args.length; i++) {
+        var converter = toC[argTypes[i]];
+        if (converter) {
+          if (stack === 0) stack = Runtime.stackSave();
+          cArgs[i] = converter(args[i]);
+        } else {
+          cArgs[i] = args[i];
+        }
+      }
     }
-    assert(type != 'array');
-    return value;
+    var ret = func.apply(null, cArgs);
+    if (returnType === 'string') ret = Pointer_stringify(ret);
+    if (stack !== 0) JSfuncs['stackRestore']();
+    return ret;
   }
-  var i = 0;
-  var cArgs = args ? args.map(function(arg) {
-    return toC(arg, argTypes[i++]);
-  }) : [];
-  var ret = fromC(func.apply(null, cArgs), returnType);
-  if (stack) Runtime.stackRestore(stack);
-  return ret;
-}
 
-// Returns a native JS wrapper for a C function. This is similar to ccall, but
-// returns a function you can call repeatedly in a normal way. For example:
-//
-//   var my_function = cwrap('my_c_function', 'number', ['number', 'number']);
-//   alert(my_function(5, 22));
-//   alert(my_function(99, 12));
-//
-function cwrap(ident, returnType, argTypes) {
-  var func = getCFunc(ident);
-  return function() {
-    return ccallFunc(func, returnType, argTypes, Array.prototype.slice.call(arguments));
+  var sourceRegex = /^function \((.*)\)\s*{\s*([^]*?)[\s;]*(?:return\s*(.*?)[;\s]*)?}$/;
+  function parseJSFunc(jsfunc) {
+    // Match the body and the return value of a javascript function source
+    var parsed = jsfunc.toString().match(sourceRegex).slice(1);
+    return {arguments : parsed[0], body : parsed[1], returnValue: parsed[2]}
   }
-}
+  var JSsource = {};
+  for (var fun in JSfuncs) {
+    if (JSfuncs.hasOwnProperty(fun)) {
+      // Elements of toCsource are arrays of three items:
+      // the code, and the return value
+      JSsource[fun] = parseJSFunc(JSfuncs[fun]);
+    }
+  }
+  // Returns a native JS wrapper for a C function. This is similar to ccall, but
+  // returns a function you can call repeatedly in a normal way. For example:
+  //
+  //   var my_function = cwrap('my_c_function', 'number', ['number', 'number']);
+  //   alert(my_function(5, 22));
+  //   alert(my_function(99, 12));
+  //
+  cwrap = function cwrap(ident, returnType, argTypes) {
+    var cfunc = getCFunc(ident);
+    // When the function takes numbers and returns a number, we can just return
+    // the original function
+    var numericArgs = argTypes.every(function(type){ return type === 'number'});
+    var numericRet = (returnType !== 'string');
+    if ( numericRet && numericArgs) {
+      return cfunc;
+    }
+    // Creation of the arguments list (["$1","$2",...,"$nargs"])
+    var argNames = argTypes.map(function(x,i){return '$'+i});
+    var funcstr = "(function(" + argNames.join(',') + ") {";
+    var nargs = argTypes.length;
+    if (!numericArgs) {
+      // Generate the code needed to convert the arguments from javascript
+      // values to pointers
+      funcstr += JSsource['stackSave'].body + ';';
+      for (var i = 0; i < nargs; i++) {
+        var arg = argNames[i], type = argTypes[i];
+        if (type === 'number') continue;
+        var convertCode = JSsource[type + 'ToC']; // [code, return]
+        funcstr += 'var ' + convertCode.arguments + ' = ' + arg + ';';
+        funcstr += convertCode.body + ';';
+        funcstr += arg + '=' + convertCode.returnValue + ';';
+      }
+    }
+
+    // When the code is compressed, the name of cfunc is not literally 'cfunc' anymore
+    var cfuncname = parseJSFunc(function(){return cfunc}).returnValue;
+    // Call the function
+    funcstr += 'var ret = ' + cfuncname + '(' + argNames.join(',') + ');';
+    if (!numericRet) { // Return type can only by 'string' or 'number'
+      // Convert the result to a string
+      var strgfy = parseJSFunc(function(){return Pointer_stringify}).returnValue;
+      funcstr += 'ret = ' + strgfy + '(ret);';
+    }
+    if (!numericArgs) {
+      // If we had a stack, restore it
+      funcstr += JSsource['stackRestore'].body + ';';
+    }
+    funcstr += 'return ret})';
+    return eval(funcstr);
+  };
+})();
 Module["cwrap"] = cwrap;
+Module["ccall"] = ccall;
 
 // Sets a value in memory in a dynamic way at run-time. Uses the
 // type data. This is the same as makeSetValue, except that
diff --git a/src/relooper/Relooper.cpp b/src/relooper/Relooper.cpp
index 568dd381..9e469ec4 100644
--- a/src/relooper/Relooper.cpp
+++ b/src/relooper/Relooper.cpp
@@ -1097,7 +1097,7 @@ void Relooper::Calculate(Block *Entry) {
     // Remove unneeded breaks and continues.
     // A flow operation is trivially unneeded if the shape we naturally get to by normal code
     // execution is the same as the flow forces us to.
-    void RemoveUnneededFlows(Shape *Root, Shape *Natural=NULL, LoopShape *LastLoop=NULL) {
+    void RemoveUnneededFlows(Shape *Root, Shape *Natural=NULL, LoopShape *LastLoop=NULL, unsigned Depth=0) {
       BlockSet NaturalBlocks;
       FollowNaturalFlow(Natural, NaturalBlocks);
       Shape *Next = Root;
@@ -1108,7 +1108,7 @@ void Relooper::Calculate(Block *Entry) {
           if (Simple->Inner->BranchVar) LastLoop = NULL; // a switch clears out the loop (TODO: only for breaks, not continue)
 
           if (Simple->Next) {
-            if (!Simple->Inner->BranchVar && Simple->Inner->ProcessedBranchesOut.size() == 2) {
+            if (!Simple->Inner->BranchVar && Simple->Inner->ProcessedBranchesOut.size() == 2 && Depth < 20) {
               // If there is a next block, we already know at Simple creation time to make direct branches,
               // and we can do nothing more in general. But, we try to optimize the case of a break and
               // a direct: This would normally be  if (break?) { break; } ..  but if we
@@ -1144,6 +1144,7 @@ void Relooper::Calculate(Block *Entry) {
                   }
                 }
               }
+              Depth++; // this optimization increases depth, for us and all our next chain (i.e., until this call returns)
             }
             Next = Simple->Next;
           } else {
@@ -1168,11 +1169,11 @@ void Relooper::Calculate(Block *Entry) {
           }
         }, {
           for (IdShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) {
-            RemoveUnneededFlows(iter->second, Multiple->Next, Multiple->Breaks ? NULL : LastLoop);
+            RemoveUnneededFlows(iter->second, Multiple->Next, Multiple->Breaks ? NULL : LastLoop, Depth+1);
           }
           Next = Multiple->Next;
         }, {
-          RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop);
+          RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth+1);
           Next = Loop->Next;
         });
       }
diff --git a/src/relooper/fuzzer.py b/src/relooper/fuzzer.py
index 18db997e..313f4d5e 100644
--- a/src/relooper/fuzzer.py
+++ b/src/relooper/fuzzer.py
@@ -3,7 +3,7 @@ import random, subprocess, difflib
 
 while True:
   # Random decisions
-  num = random.randint(2, 250)
+  num = random.randint(2, 500)
   density = random.random() * random.random()
   decisions = [random.randint(1, num*20) for x in range(num*3)]
   branches = [0]*num
@@ -123,14 +123,14 @@ int main() {
   open('fuzz.slow.js', 'w').write(slow)
   open('fuzz.cpp', 'w').write(fast)
   print '_'
-  slow_out = subprocess.Popen(['mozjs', '-m', '-n', 'fuzz.slow.js'], stdout=subprocess.PIPE).communicate()[0]
+  slow_out = subprocess.Popen(['mozjs', 'fuzz.slow.js'], stdout=subprocess.PIPE).communicate()[0]
 
   print '.'
   subprocess.call(['g++', 'fuzz.cpp', 'Relooper.o', '-o', 'fuzz', '-g'])
   print '*'
   subprocess.call(['./fuzz'], stdout=open('fuzz.fast.js', 'w'))
   print '-'
-  fast_out = subprocess.Popen(['mozjs', '-m', '-n', 'fuzz.fast.js'], stdout=subprocess.PIPE).communicate()[0]
+  fast_out = subprocess.Popen(['mozjs', 'fuzz.fast.js'], stdout=subprocess.PIPE).communicate()[0]
   print
 
   if slow_out != fast_out:
diff --git a/src/relooper/test.cpp b/src/relooper/test.cpp
index 9f3ddceb..f319757b 100644
--- a/src/relooper/test.cpp
+++ b/src/relooper/test.cpp
@@ -1,4 +1,6 @@
 
+#include <vector>
+
 #include "Relooper.h"
 
 int main() {
@@ -435,5 +437,34 @@ int main() {
 
     puts(r.GetOutputBuffer());
   }
+
+  if (1) {
+    Relooper::MakeOutputBuffer(10);
+
+    printf("\n\n-- lots of exits to an unwind block, possible nesting --\n\n");
+
+    const int DEPTH = 40;
+
+    std::vector<Block*> blocks;
+    for (int i = 0; i < DEPTH; i++) blocks.push_back(new Block("// block\n", NULL));
+    Block *last = new Block("// last\nreturn;\n", NULL);
+    Block *UW = new Block("// UW\nresumeException();\n\n", NULL);
+
+    for (int i = 0; i < DEPTH; i++) {
+      Block *b = blocks[i];
+      b->AddBranchTo(i+1 < DEPTH ? blocks[i+1] : last, "check()", NULL);
+      b->AddBranchTo(UW, NULL, NULL);
+    }
+
+    Relooper r;
+    for (int i = 0; i < DEPTH; i++) r.AddBlock(blocks[i]);
+    r.AddBlock(last);
+    r.AddBlock(UW);
+
+    r.Calculate(blocks[0]);
+    r.Render();
+
+    puts(r.GetOutputBuffer());
+  }
 }
 
diff --git a/src/relooper/test.txt b/src/relooper/test.txt
index d53aeeb1..2ae331c0 100644
--- a/src/relooper/test.txt
+++ b/src/relooper/test.txt
@@ -417,3 +417,156 @@
  }
  // block D
 
+
+
+-- lots of exits to an unwind block, possible nesting --
+
+ // block
+ do {
+  if (check()) {
+   // block
+   if (check()) {
+    // block
+    if (check()) {
+     // block
+     if (check()) {
+      // block
+      if (check()) {
+       // block
+       if (check()) {
+        // block
+        if (check()) {
+         // block
+         if (check()) {
+          // block
+          if (check()) {
+           // block
+           if (check()) {
+            // block
+            if (check()) {
+             // block
+             if (check()) {
+              // block
+              if (check()) {
+               // block
+               if (check()) {
+                // block
+                if (check()) {
+                 // block
+                 if (check()) {
+                  // block
+                  if (check()) {
+                   // block
+                   if (check()) {
+                    // block
+                    if (check()) {
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // block
+                     if (!(check())) {
+                      break;
+                     }
+                     // last
+                     return;
+                    }
+                   }
+                  }
+                 }
+                }
+               }
+              }
+             }
+            }
+           }
+          }
+         }
+        }
+       }
+      }
+     }
+    }
+   }
+  }
+ } while(0);
+ // UW
+ resumeException();
+ 
+
diff --git a/tests/life.c b/tests/life.c
index 4ce8d385..263fa0e6 100644
--- a/tests/life.c
+++ b/tests/life.c
@@ -67,7 +67,9 @@ void game(int w, int h, int i)
       i--;
       nudge(univ, w, h); // keep it interesting for benchmark
     } else {
+#if !__EMSCRIPTEN__
       usleep(20000);
+#endif
       show(univ, w, h);
     }
   }
diff --git a/tests/test_other.py b/tests/test_other.py
index 4a6296e0..5553a7b1 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -1027,10 +1027,85 @@ This pointer might make sense in another type signature: i: 0
     Building.emar('cr', lib_name, [a_name + '.o', b_name + '.o']) # libLIB.a with a and b
 
     # a is in the lib AND in an .o, so should be ignored in the lib. We do still need b from the lib though
-    Building.emcc(main_name, ['-L.', '-lLIB', a_name+'.o', c_name + '.o'], output_filename='a.out.js')
+    Building.emcc(main_name, [a_name+'.o', c_name + '.o', '-L.', '-lLIB'], output_filename='a.out.js')
 
     self.assertContained('result: 62', run_js(os.path.join(self.get_dir(), 'a.out.js')))
 
+  def test_link_group_asserts(self):
+    lib_src_name = os.path.join(self.get_dir(), 'lib.c')
+    open(lib_src_name, 'w').write('int x() { return 42; }')
+
+    main_name = os.path.join(self.get_dir(), 'main.c')
+    open(main_name, 'w').write(r'''
+      #include <stdio.h>
+      int x();
+      int main() {
+        printf("result: %d\n", x());
+        return 0;
+      }
+    ''')
+
+    Building.emcc(lib_src_name) # lib.c.o
+    lib_name = os.path.join(self.get_dir(), 'libLIB.a')
+    Building.emar('cr', lib_name, [lib_src_name + '.o']) # libLIB.a with lib.c.o
+
+    def test(lib_args, err_expected):
+      output = Popen([PYTHON, EMCC, main_name, '-o', 'a.out.js'] + lib_args, stdout=PIPE, stderr=PIPE).communicate()
+      if err_expected:
+        self.assertContained(err_expected, output[1])
+      else:
+        out_js = os.path.join(self.get_dir(), 'a.out.js')
+        assert os.path.exists(out_js), '\n'.join(output)
+        self.assertContained('result: 42', run_js(out_js))
+
+    test(['-Wl,--start-group', lib_name], '--start-group without matching --end-group')
+    test(['-Wl,--start-group', lib_name, '-Wl,--start-group'], 'Nested --start-group, missing --end-group?')
+    test(['-Wl,--end-group', lib_name, '-Wl,--start-group'], '--end-group without --start-group')
+    test(['-Wl,--start-group', lib_name, '-Wl,--end-group'], None)
+
+  def test_circular_libs(self):
+    def tmp_source(name, code):
+      file_name = os.path.join(self.get_dir(), name)
+      open(file_name, 'w').write(code)
+      return file_name
+
+    a = tmp_source('a.c', 'int z(); int x() { return z(); }')
+    b = tmp_source('b.c', 'int x(); int y() { return x(); } int z() { return 42; }')
+    c = tmp_source('c.c', 'int q() { return 0; }')
+    main = tmp_source('main.c', r'''
+      #include <stdio.h>
+      int y();
+      int main() {
+        printf("result: %d\n", y());
+        return 0;
+      }
+    ''')
+
+    Building.emcc(a) # a.c.o
+    Building.emcc(b) # b.c.o
+    Building.emcc(c) # c.c.o
+    lib_a = os.path.join(self.get_dir(), 'libA.a')
+    Building.emar('cr', lib_a, [a + '.o', c + '.o']) # libA.a with a.c.o,c.c.o
+    lib_b = os.path.join(self.get_dir(), 'libB.a')
+    Building.emar('cr', lib_b, [b + '.o', c + '.o']) # libB.a with b.c.o,c.c.o
+
+    args = ['-s', 'ERROR_ON_UNDEFINED_SYMBOLS=1', main, '-o', 'a.out.js']
+    libs = [lib_a, lib_b]
+
+    # lib_a does not satisfy any symbols from main, so it will not be included,
+    # and there will be an unresolved symbol.
+    output = Popen([PYTHON, EMCC] + args + libs, stdout=PIPE, stderr=PIPE).communicate()
+    self.assertContained('error: unresolved symbol: x', output[1])
+
+    # -Wl,--start-group and -Wl,--end-group around the libs will cause a rescan
+    # of lib_a after lib_b adds undefined symbol "x", so a.c.o will now be
+    # included (and the link will succeed).
+    libs = ['-Wl,--start-group'] + libs + ['-Wl,--end-group']
+    output = Popen([PYTHON, EMCC] + args + libs, stdout=PIPE, stderr=PIPE).communicate()
+    out_js = os.path.join(self.get_dir(), 'a.out.js')
+    assert os.path.exists(out_js), '\n'.join(output)
+    self.assertContained('result: 42', run_js(out_js))
+
   def test_redundant_link(self):
     lib = "int mult() { return 1; }"
     lib_name = os.path.join(self.get_dir(), 'libA.c')
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js
index 088c4f0f..2004efda 100644
--- a/tools/js-optimizer.js
+++ b/tools/js-optimizer.js
@@ -2237,18 +2237,30 @@ function registerizeHarder(ast) {
           break;
         case 'conditional':
           isInExpr++;
-          buildFlowGraph(node[1]);
-          var jEnter = markJunction();
-          var jExit = addJunction();
-          if (node[2]) {
-            buildFlowGraph(node[2]);
-          }
-          joinJunction(jExit);
-          setJunction(jEnter);
-          if (node[3]) {
-            buildFlowGraph(node[3]);
+          // If the conditional has no side-effects, we can treat it as a single
+          // block, which might open up opportunities to remove it entirely.
+          if (!hasSideEffects(node)) {
+            buildFlowGraph(node[1]);
+            if (node[2]) {
+              buildFlowGraph(node[2]);
+            }
+            if (node[3]) {
+              buildFlowGraph(node[3]);
+            }
+          } else {
+            buildFlowGraph(node[1]);
+            var jEnter = markJunction();
+            var jExit = addJunction();
+            if (node[2]) {
+              buildFlowGraph(node[2]);
+            }
+            joinJunction(jExit);
+            setJunction(jEnter);
+            if (node[3]) {
+              buildFlowGraph(node[3]);
+            }
+            joinJunction(jExit);
           }
-          joinJunction(jExit);
           isInExpr--;
           break;
         case 'while':
diff --git a/tools/shared.py b/tools/shared.py
index 5305d46b..fadec881 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -1144,63 +1144,131 @@ class Building:
     unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS])
     resolved_symbols = set()
     temp_dirs = []
-    files = map(os.path.abspath, files)
+    def make_paths_absolute(f):
+      if f.startswith('-'): # skip flags
+        return f
+      else:
+        return os.path.abspath(f)
+    files = map(make_paths_absolute, files)
+    # Paths of already included object files from archives.
+    added_contents = set()
+    # Map of archive name to list of extracted object file paths.
+    ar_contents = {}
     has_ar = False
     for f in files:
-      has_ar = has_ar or Building.is_ar(f)
+      if not f.startswith('-'):
+        has_ar = has_ar or Building.is_ar(f)
+
+    # If we have only one archive or the force_archive_contents flag is set,
+    # then we will add every object file we see, regardless of whether it
+    # resolves any undefined symbols.
+    force_add_all = len(files) == 1 or force_archive_contents
+
+    # Considers an object file for inclusion in the link. The object is included
+    # if force_add=True or if the object provides a currently undefined symbol.
+    # If the object is included, the symbol tables are updated and the function
+    # returns True.
+    def consider_object(f, force_add=False):
+      new_symbols = Building.llvm_nm(f)
+      do_add = force_add or not unresolved_symbols.isdisjoint(new_symbols.defs)
+      if do_add:
+        logging.debug('adding object %s to link' % (f))
+        # Update resolved_symbols table with newly resolved symbols
+        resolved_symbols.update(new_symbols.defs)
+        # Update unresolved_symbols table by adding newly unresolved symbols and
+        # removing newly resolved symbols.
+        unresolved_symbols.update(new_symbols.undefs.difference(resolved_symbols))
+        unresolved_symbols.difference_update(new_symbols.defs)
+        actual_files.append(f)
+      return do_add
+
+    def get_archive_contents(f):
+      if f in ar_contents:
+        return ar_contents[f]
+
+      cwd = os.getcwd()
+      try:
+        temp_dir = os.path.join(EMSCRIPTEN_TEMP_DIR, 'ar_output_' + str(os.getpid()) + '_' + str(len(temp_dirs)))
+        temp_dirs.append(temp_dir)
+        safe_ensure_dirs(temp_dir)
+        os.chdir(temp_dir)
+        contents = filter(lambda x: len(x) > 0, Popen([LLVM_AR, 't', f], stdout=PIPE).communicate()[0].split('\n'))
+        if len(contents) == 0:
+          logging.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % f)
+        else:
+          for content in contents: # ar will silently fail if the directory for the file does not exist, so make all the necessary directories
+            dirname = os.path.dirname(content)
+            if dirname:
+              safe_ensure_dirs(dirname)
+          Popen([LLVM_AR, 'xo', f], stdout=PIPE).communicate() # if absolute paths, files will appear there. otherwise, in this directory
+          contents = map(lambda content: os.path.join(temp_dir, content), contents)
+          contents = filter(os.path.exists, map(os.path.abspath, contents))
+          contents = filter(Building.is_bitcode, contents)
+        ar_contents[f] = contents
+      finally:
+        os.chdir(cwd)
+
+      return contents
+
+    # Traverse a single archive. The object files are repeatedly scanned for
+    # newly satisfied symbols until no new symbols are found. Returns true if
+    # any object files were added to the link.
+    def consider_archive(f):
+      added_any_objects = False
+      loop_again = True
+      logging.debug('considering archive %s' % (f))
+      contents = get_archive_contents(f)
+      while loop_again: # repeatedly traverse until we have everything we need
+        loop_again = False
+        for content in contents:
+          if content in added_contents: continue
+          # Link in the .o if it provides symbols, *or* this is a singleton archive (which is apparently an exception in gcc ld)
+          if consider_object(content, force_add=force_add_all):
+            added_contents.add(content)
+            loop_again = True
+            added_any_objects = True
+      logging.debug('done running loop of archive %s' % (f))
+      return added_any_objects
+
+    current_archive_group = None
     for f in files:
-      if not Building.is_ar(f):
+      if f.startswith('-'):
+        if f in ['--start-group', '-(']:
+          assert current_archive_group is None, 'Nested --start-group, missing --end-group?'
+          current_archive_group = []
+        elif f in ['--end-group', '-)']:
+          assert current_archive_group is not None, '--end-group without --start-group'
+          # rescan the archives in the group until we don't find any more
+          # objects to link.
+          loop_again = True
+          logging.debug('starting archive group loop');
+          while loop_again:
+            loop_again = False
+            for archive in current_archive_group:
+              if consider_archive(archive):
+                loop_again = True
+          logging.debug('done with archive group loop');
+          current_archive_group = None
+        else:
+          logging.debug('Ignoring unsupported link flag: %s' % f)
+      elif not Building.is_ar(f):
         if Building.is_bitcode(f):
           if has_ar:
-            new_symbols = Building.llvm_nm(f)
-            resolved_symbols = resolved_symbols.union(new_symbols.defs)
-            unresolved_symbols = unresolved_symbols.union(new_symbols.undefs.difference(resolved_symbols)).difference(new_symbols.defs)
-          actual_files.append(f)
+            consider_object(f, force_add=True)
+          else:
+            # If there are no archives then we can simply link all valid bitcode
+            # files and skip the symbol table stuff.
+            actual_files.append(f)
       else:
         # Extract object files from ar archives, and link according to gnu ld semantics
         # (link in an entire .o from the archive if it supplies symbols still unresolved)
-        cwd = os.getcwd()
-        try:
-          temp_dir = os.path.join(EMSCRIPTEN_TEMP_DIR, 'ar_output_' + str(os.getpid()) + '_' + str(len(temp_dirs)))
-          temp_dirs.append(temp_dir)
-          safe_ensure_dirs(temp_dir)
-          os.chdir(temp_dir)
-          contents = filter(lambda x: len(x) > 0, Popen([LLVM_AR, 't', f], stdout=PIPE).communicate()[0].split('\n'))
-          #print >> sys.stderr, '  considering archive', f, ':', contents
-          if len(contents) == 0:
-            logging.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % f)
-          else:
-            for content in contents: # ar will silently fail if the directory for the file does not exist, so make all the necessary directories
-              dirname = os.path.dirname(content)
-              if dirname:
-                safe_ensure_dirs(dirname)
-            Popen([LLVM_AR, 'xo', f], stdout=PIPE).communicate() # if absolute paths, files will appear there. otherwise, in this directory
-            contents = map(lambda content: os.path.join(temp_dir, content), contents)
-            contents = filter(os.path.exists, map(os.path.abspath, contents))
-            added_contents = set()
-            added = True
-            #print >> sys.stderr, '  initial undef are now ', unresolved_symbols, '\n'
-            while added: # recursively traverse until we have everything we need
-              #print >> sys.stderr, '  running loop of archive including for', f
-              added = False
-              for content in contents:
-                if content in added_contents: continue 
-                new_symbols = Building.llvm_nm(content)
-                # Link in the .o if it provides symbols, *or* this is a singleton archive (which is apparently an exception in gcc ld)
-                #print >> sys.stderr, 'need', content, '?', unresolved_symbols, 'and we can supply', new_symbols.defs
-                #print >> sys.stderr, content, 'DEF', new_symbols.defs, '\n'
-                if new_symbols.defs.intersection(unresolved_symbols) or len(files) == 1 or force_archive_contents:
-                  if Building.is_bitcode(content):
-                    #print >> sys.stderr, '  adding object', content, '\n'
-                    resolved_symbols = resolved_symbols.union(new_symbols.defs)
-                    unresolved_symbols = unresolved_symbols.union(new_symbols.undefs.difference(resolved_symbols)).difference(new_symbols.defs)
-                    #print >> sys.stderr, '  undef are now ', unresolved_symbols, '\n'
-                    actual_files.append(content)
-                    added_contents.add(content)
-                    added = True
-            #print >> sys.stderr, '  done running loop of archive including for', f
-        finally:
-          os.chdir(cwd)
+        consider_archive(f)
+        # If we're inside a --start-group/--end-group section, add to the list
+        # so we can loop back around later.
+        if current_archive_group is not None:
+          current_archive_group.append(f)
+    assert current_archive_group is None, '--start-group without matching --end-group'
+
     try_delete(target)
 
     # Finish link
diff --git a/tools/test-js-optimizer-asm-regs-harder-output.js b/tools/test-js-optimizer-asm-regs-harder-output.js
index e1df42cb..c3b326f6 100644
--- a/tools/test-js-optimizer-asm-regs-harder-output.js
+++ b/tools/test-js-optimizer-asm-regs-harder-output.js
@@ -129,4 +129,9 @@ function linkedVars() {
  }
  return i2 + i1;
 }
+function deadCondExpr(i2) {
+ i2 = i2 | 0;
+ var i1 = 0;
+ return i1 | 0;
+}
 
diff --git a/tools/test-js-optimizer-asm-regs-harder.js b/tools/test-js-optimizer-asm-regs-harder.js
index 0231a215..fa72aab8 100644
--- a/tools/test-js-optimizer-asm-regs-harder.js
+++ b/tools/test-js-optimizer-asm-regs-harder.js
@@ -149,5 +149,11 @@ function linkedVars() {
  }
  return outer1 + outer2;
 }
-// EMSCRIPTEN_GENERATED_FUNCTIONS: ["asm", "_doit", "stackRestore", "switchey", "switchey2", "iffey", "labelledJump", "linkedVars']
+function deadCondExpr(input) {
+  input = input|0;
+  var dead = 0, temp = 0;
+  dead = (!input ? -1 : input)|0;
+  return temp|0;
+}
+// EMSCRIPTEN_GENERATED_FUNCTIONS: ["asm", "_doit", "stackRestore", "switchey", "switchey2", "iffey", "labelledJump", "linkedVars", "deadCondExpr"]