15 files changed, 2057 insertions, 629 deletions
diff --git a/tools/asm_module.py b/tools/asm_module.py
new file mode 100644
index 00000000..bf7fa71d
--- /dev/null
+++ b/tools/asm_module.py
@@ -0,0 +1,271 @@
+
+import sys, re
+
+import shared, js_optimizer
+
+
+class AsmModule():
+  def __init__(self, filename):
+    self.filename = filename
+    self.js = open(filename).read()
+
+    self.start_asm = self.js.find(js_optimizer.start_asm_marker)
+    self.start_funcs = self.js.find(js_optimizer.start_funcs_marker)
+    self.end_funcs = self.js.rfind(js_optimizer.end_funcs_marker)
+    self.end_asm = self.js.rfind(js_optimizer.end_asm_marker)
+
+    # pre and asm
+    self.pre_js = self.js[:self.start_asm]
+    self.asm_js = self.js[self.start_asm:self.end_asm]
+
+    # heap initializer
+    self.staticbump = int(re.search(shared.JS.memory_staticbump_pattern, self.pre_js).group(1))
+    if self.staticbump:
+      self.mem_init_js = re.search(shared.JS.memory_initializer_pattern, self.pre_js).group(0)
+
+    # global initializers
+    global_inits = re.search(shared.JS.global_initializers_pattern, self.pre_js)
+    if global_inits:
+      self.global_inits_js = global_inits.group(0)
+      self.global_inits = map(lambda init: init.split('{')[2][1:].split('(')[0], global_inits.groups(0)[0].split(','))
+    else:
+      self.global_inits_js = ''
+      self.global_inits = []
+
+    # imports (and global variables)
+    first_var = self.js.find('var ', self.js.find('var ', self.start_asm)+4)
+    self.pre_imports_js = self.js[self.start_asm:first_var]
+    self.imports_js = self.js[first_var:self.start_funcs]
+    self.imports = {}
+    for imp in js_optimizer.import_sig.finditer(self.imports_js):
+      key, value = imp.group(0).split('var ')[1][:-1].split('=', 1)
+      self.imports[key.strip()] = value.strip()
+    #print >> sys.stderr, 'imports', self.imports
+
+    # funcs
+    self.funcs_js = self.js[self.start_funcs:self.end_funcs]
+    self.funcs = set([m.group(2) for m in js_optimizer.func_sig.finditer(self.funcs_js)])
+    #print 'funcs', self.funcs
+
+    # tables and exports
+    post_js = self.js[self.end_funcs:self.end_asm]
+    ret = post_js.find('return ')
+    self.tables_js = post_js[:ret]
+    self.exports_js = post_js[ret:]
+    self.tables = self.parse_tables(self.tables_js)
+    self.exports = set([export.strip() for export in self.exports_js[self.exports_js.find('{')+1:self.exports_js.find('}')].split(',')])
+
+    # post
+    self.post_js = self.js[self.end_asm:]
+    self.sendings = {}
+    for sending in [sending.strip() for sending in self.post_js[self.post_js.find('}, { ')+5:self.post_js.find(' }, buffer);')].split(',')]:
+      colon = sending.find(':')
+      self.sendings[sending[:colon].replace('"', '')] = sending[colon+1:].strip()
+    self.module_defs = set(re.findall('var [\w\d_$]+ = Module\["[\w\d_$]+"\] = asm\["[\w\d_$]+"\];\n', self.post_js))
+
+  def relocate_into(self, main):
+    # heap initializer
+    if self.staticbump > 0:
+      new_mem_init = self.mem_init_js[:self.mem_init_js.rfind(', ')] + ', Runtime.GLOBAL_BASE+%d)' % main.staticbump
+      main.pre_js = re.sub(shared.JS.memory_staticbump_pattern, 'STATICTOP = STATIC_BASE + %d;\n' % (main.staticbump + self.staticbump) + new_mem_init, main.pre_js, count=1)
+
+    # Find function name replacements TODO: do not rename duplicate names with duplicate contents, just merge them
+    replacements = {}
+    for func in self.funcs:
+      rep = func
+      while rep in main.funcs:
+        rep += '_'
+        replacements[func] = rep
+    #print >> sys.stderr, 'replacements:', replacements
+
+    # sendings: add invokes for new tables
+    all_sendings = main.sendings
+    added_sending = False
+    for table in self.tables:
+      if table not in main.tables:
+        sig = table[table.rfind('_')+1:]
+        all_sendings['invoke_%s' % sig] = shared.JS.make_invoke(sig, named=False)
+        added_sending = True
+
+    # imports
+    all_imports = main.imports
+    for key, value in self.imports.iteritems():
+      if key in self.funcs or key in main.funcs: continue # external function in one module, implemented in the other
+      value_concrete = '.' not in value # env.key means it is an import, an external value, and not a concrete one
+      main_value = main.imports.get(key)
+      main_value_concrete = main_value and '.' not in main_value
+      if value_concrete and main_value_concrete: continue # standard global var
+      if not main_value or value_concrete:
+        if '+' in value:
+          # relocate
+          value = value.replace('(', '').replace(')', '').replace('| 0', '').replace('|0', '').replace(' ', '')
+          left, right = value.split('+')
+          assert left == 'H_BASE'
+          value = str(main.staticbump + int(right))
+        all_imports[key] = value
+      if (value_concrete or main_value_concrete) and key in all_sendings:
+        del all_sendings[key] # import of external value no longer needed
+    main.imports_js = '\n'.join(['var %s = %s;' % (key, value) for key, value in all_imports.iteritems()]) + '\n'
+
+    # check for undefined references to global variables
+    def check_import(key, value):
+      if value.startswith('+') or value.endswith('|0'): # ignore functions
+        if key not in all_sendings:
+          print >> sys.stderr, 'warning: external variable %s is still not defined after linking' % key
+          all_sendings[key] = '0'
+    for key, value in all_imports.iteritems(): check_import(key, value)
+
+    if added_sending:
+      sendings_js = ', '.join(['%s: %s' % (key, value) for key, value in all_sendings.iteritems()])
+      sendings_start = main.post_js.find('}, { ')+5
+      sendings_end = main.post_js.find(' }, buffer);')
+      main.post_js = main.post_js[:sendings_start] + sendings_js + main.post_js[sendings_end:]
+
+    # tables
+    f_bases = {}
+    f_sizes = {}
+    for table, data in self.tables.iteritems():
+      main.tables[table] = self.merge_tables(table, main.tables.get(table), data, replacements, f_bases, f_sizes)
+    main.combine_tables()
+    #print >> sys.stderr, 'f bases', f_bases
+
+    # relocate
+    temp = shared.Building.js_optimizer(self.filename, ['asm', 'relocate', 'last'], extra_info={
+      'replacements': replacements,
+      'fBases': f_bases,
+      'hBase': main.staticbump
+    })
+    #print >> sys.stderr, 'relocated side into', temp
+    relocated_funcs = AsmModule(temp)
+    shared.try_delete(temp)
+    main.extra_funcs_js = relocated_funcs.funcs_js.replace(js_optimizer.start_funcs_marker, '\n')
+
+    # update function table uses
+    ft_marker = 'FUNCTION_TABLE_'
+
+    def update_fts(what):
+      updates = []
+      i = 1 # avoid seeing marker in recursion
+      while 1:
+        i = what.find(ft_marker, i)
+        if i < 0: break;
+        start = i
+        end = what.find('[', start)
+        table = what[i:end]
+        if table not in f_sizes:
+          # table was not modified
+          i += len(ft_marker)
+          continue
+        nesting = 1
+        while nesting > 0:
+          next = what.find(']', end+1)
+          nesting -= 1
+          nesting += what.count('[', end+1, next)
+          end = next
+        assert end > 0
+        mask = what.rfind('&', start, end)
+        assert mask > 0 and end - mask <= 13
+        fixed = update_fts(what[start:mask+1] + str(f_sizes[table]-1) + ']')
+        updates.append((start, end, fixed))
+        i = end # additional function table uses were done by recursion
+      # apply updates
+      if len(updates) == 0: return what
+      parts = []
+      so_far = 0
+      for i in range(len(updates)):
+        start, end, fixed = updates[i]
+        parts.append(what[so_far:start])
+        parts.append(fixed)
+        so_far = end+1
+      parts.append(what[so_far:])
+      return ''.join(parts)
+
+    main.funcs_js = update_fts(main.funcs_js)
+    main.extra_funcs_js = update_fts(main.extra_funcs_js)
+
+    # global initializers
+    if self.global_inits:
+      my_global_inits = map(lambda init: replacements[init] if init in replacements else init, self.global_inits)
+      all_global_inits = map(lambda init: '{ func: function() { %s() } }' % init, main.global_inits + my_global_inits)
+      all_global_inits_js = '/* global initializers */ __ATINIT__.push(' + ','.join(all_global_inits) + ');'
+      if main.global_inits:
+        target = main.global_inits_js
+      else:
+        target = '// === Body ===\n'
+        all_global_inits_js = target + all_global_inits_js
+      main.pre_js = main.pre_js.replace(target, all_global_inits_js)
+
+    # exports
+    def rep_exp(export):
+      key, value = export.split(':')
+      if key in replacements:
+        repped = replacements[key]
+        return repped + ': ' + repped
+      return export
+    my_exports = map(rep_exp, self.exports)
+    exports = main.exports.union(my_exports)
+    main.exports_js = 'return {' + ','.join(list(exports)) + '};\n})\n'
+
+    # post
+    def rep_def(deff):
+      key = deff.split(' ')[1]
+      if key in replacements:
+        rep = replacements[key]
+        return 'var %s = Module["%s"] = asm["%s"];\n' % (rep, rep, rep)
+      return deff
+    my_module_defs = map(rep_def, self.module_defs)
+    new_module_defs = set(my_module_defs).difference(main.module_defs)
+    if len(new_module_defs) > 0:
+      position = main.post_js.find('Runtime.') # Runtime is the start of the hardcoded ones
+      main.post_js = main.post_js[:position] + ''.join(list(new_module_defs)) + '\n' + main.post_js[position:]
+
+  def write(self, out):
+    f = open(out, 'w')
+    f.write(self.pre_js)
+    f.write(self.pre_imports_js)
+    f.write(self.imports_js)
+    f.write(self.funcs_js)
+    f.write(self.extra_funcs_js)
+    f.write(self.tables_js)
+    f.write(self.exports_js)
+    f.write(self.post_js)
+    f.close()
+
+  # Utilities
+
+  def parse_tables(self, js):
+    tables = {}
+    parts = js.split(';')
+    for part in parts:
+      if '=' not in part: continue
+      part = part.split('var ')[1]
+      name, data = part.split('=')
+      tables[name.strip()] = data.strip()
+    return tables
+
+  def merge_tables(self, table, main, side, replacements, f_bases, f_sizes):
+    sig = table.split('_')[-1]
+    side = side[1:-1].split(',')
+    side = map(lambda f: replacements[f] if f in replacements else f, side)
+    if not main:
+      f_bases[sig] = 0
+      f_sizes[table] = len(side)
+      return '[' + ','.join(side) + ']'
+    main = main[1:-1].split(',')
+    # TODO: handle non-aliasing case too
+    assert len(main) % 2 == 0
+    f_bases[sig] = len(main)
+    ret = main + side
+    size = 2
+    while size < len(ret): size *= 2
+    aborter = ret[1] # we can assume odd indexes have an aborting function with the right signature
+    ret = ret + [aborter]*(size - len(ret))
+    assert len(ret) == size
+    f_sizes[table] = size
+    return '[' + ','.join(ret) + ']'
+
+  def combine_tables(self):
+    self.tables_js = '// EMSCRIPTEN_END_FUNCS\n'
+    for table, data in self.tables.iteritems():
+      self.tables_js += 'var %s = %s;\n' % (table, data)
+
diff --git a/tools/file_packager.py b/tools/file_packager.py
index 575e5957..1a9d925b 100644
--- a/tools/file_packager.py
+++ b/tools/file_packager.py
@@ -122,8 +122,6 @@ for arg in sys.argv[1:]:
       srcpath, dstpath = arg.split('@') # User is specifying destination filename explicitly.
     else:
       srcpath = dstpath = arg # Use source path as destination path.
-      if os.path.isabs(dstpath):
-        print >> sys.stderr, 'Warning: Embedding an absolute file/directory name "' + dstpath + '" to the virtual filesystem. The file will be made available in the path "' + dstpath + '", and not in the root of the generated file system. Use the explicit syntax --preload-file srcpath@dstpath to specify the target location the absolute source path should be directed to.'
     if os.path.isfile(srcpath) or os.path.isdir(srcpath):
       data_files.append({ 'srcpath': srcpath, 'dstpath': dstpath, 'mode': mode })
     else:
@@ -198,6 +196,22 @@ for file_ in data_files:
     os.path.walk(file_['srcpath'], add, [file_['mode'], file_['srcpath'], file_['dstpath']])
 data_files = filter(lambda file_: not os.path.isdir(file_['srcpath']), data_files)
 
+# Absolutize paths, and check that they make sense
+curr_abspath = os.path.abspath(os.getcwd())
+for file_ in data_files:
+  if file_['srcpath'] == file_['dstpath']:
+    # This file was not defined with src@dst, so we inferred the destination from the source. In that case,
+    # we require that the destination not be under the current location
+    path = file_['dstpath']
+    abspath = os.path.abspath(path)
+    if DEBUG: print >> sys.stderr, path, abspath, curr_abspath
+    if not abspath.startswith(curr_abspath):
+      print >> sys.stderr, 'Error: Embedding "%s" which is below the current directory "%s". This is invalid since the current directory becomes the root that the generated code will see' % (path, curr_abspath)
+      sys.exit(1)
+    file_['dstpath'] = abspath[len(curr_abspath)+1:]
+    if os.path.isabs(path):
+      print >> sys.stderr, 'Warning: Embedding an absolute file/directory name "' + path + '" to the virtual filesystem. The file will be made available in the relative path "' + file_['dstpath'] + '". You can use the explicit syntax --preload-file srcpath@dstpath to explicitly specify the target location the absolute source path should be directed to.'
+
 for file_ in data_files:
   file_['dstpath'] = file_['dstpath'].replace(os.path.sep, '/') # name in the filesystem, native and emulated
   if file_['dstpath'].endswith('/'): # If user has submitted a directory name as the destination but omitted the destination filename, use the filename from source file
diff --git a/tools/find_bigfuncs.py b/tools/find_bigfuncs.py
index ebff8b6e..6fdec3a9 100644
--- a/tools/find_bigfuncs.py
+++ b/tools/find_bigfuncs.py
@@ -1,23 +1,23 @@
 '''
-Simple tool to find big functions in an .ll file. Anything over i64 is of interest.
+Simple tool to find big functions in an .ll file.
 '''
 
 import os, sys, re
 
 filename = sys.argv[1]
 i = 0
-maxx = -1
-maxxest = '?'
 start = -1
-curr = '?'
+curr = None
+data = []
 for line in open(filename):
   i += 1
   if line.startswith('function '):
     start = i
     curr = line
-  elif line.startswith('}'):
+  elif line.startswith('}') and curr:
     size = i - start
-    if size > maxx:
-      maxx = size
-      maxxest = curr
-print maxx, 'lines in', maxxest
+    data.append([curr, size])
+    curr = None
+data.sort(lambda x, y: x[1] - y[1])
+print ''.join(['%6d : %s' % (x[1], x[0]) for x in data])
+
diff --git a/tools/js-optimizer.js b/tools/js-optimizer.js
index 95212fbc..4192ddd1 100644
--- a/tools/js-optimizer.js
+++ b/tools/js-optimizer.js
@@ -410,7 +410,7 @@ function removeUnneededLabelSettings(ast) {
 var USEFUL_BINARY_OPS = set('<<', '>>', '|', '&', '^');
 var COMPARE_OPS = set('<', '<=', '>', '>=', '==', '===', '!=', '!==');
 
-function simplifyExpressionsPre(ast) {
+function simplifyExpressions(ast) {
   // Simplify common expressions used to perform integer conversion operations
   // in cases where no conversion is needed.
   function simplifyIntegerConversions(ast) {
@@ -793,6 +793,7 @@ function simplifyExpressionsPre(ast) {
     joinAdditions(func);
     // simplifyZeroComp(func); TODO: investigate performance
     if (asm) asmOpts(func);
+    simplifyNotComps(func);
   });
 }
 
@@ -1158,10 +1159,6 @@ function simplifyNotComps(ast) {
   simplifyNotCompsPass = false;
 }
 
-function simplifyExpressionsPost(ast) {
-  simplifyNotComps(ast);
-}
-
 var NO_SIDE_EFFECTS = set('num', 'name');
 
 function hasSideEffects(node) { // this is 99% incomplete!
@@ -1540,24 +1537,22 @@ function detectAsmCoercion(node, asmInfo) {
   // for params, +x vs x|0, for vars, 0.0 vs 0
   if (node[0] === 'num' && node[1].toString().indexOf('.') >= 0) return ASM_DOUBLE;
   if (node[0] === 'unary-prefix') return ASM_DOUBLE;
-  if (asmInfo && node[0] == 'name') {
-    if (node[1] in asmInfo.vars) return asmInfo.vars[node[1]];
-    if (node[1] in asmInfo.params) return asmInfo.params[node[1]];
-  }
+  if (asmInfo && node[0] == 'name') return getAsmType(node[1], asmInfo);
   return ASM_INT;
 }
 
-function makeAsmParamCoercion(param, type) {
-  return type === ASM_INT ? ['binary', '|', ['name', param], ['num', 0]] : ['unary-prefix', '+', ['name', param]];
+function makeAsmCoercion(node, type) {
+  return type === ASM_INT ? ['binary', '|', node, ['num', 0]] : ['unary-prefix', '+', node];
 }
 
 function makeAsmVarDef(v, type) {
   return [v, type === ASM_INT ? ['num', 0] : ['unary-prefix', '+', ['num', 0]]];
 }
 
-function getAsmType(asmInfo, name) {
+function getAsmType(name, asmInfo) {
   if (name in asmInfo.vars) return asmInfo.vars[name];
-  return asmInfo.params[name];
+  if (name in asmInfo.params) return asmInfo.params[name];
+  assert(false, 'unknown var ' + name);
 }
 
 function normalizeAsm(func) {
@@ -1658,7 +1653,7 @@ function denormalizeAsm(func, data) {
   // add param coercions
   var next = 0;
   func[2].forEach(function(param) {
-    stats[next++] = ['stat', ['assign', true, ['name', param], makeAsmParamCoercion(param, data.params[param])]];
+    stats[next++] = ['stat', ['assign', true, ['name', param], makeAsmCoercion(['name', param], data.params[param])]];
   });
   // add variable definitions
   var varDefs = [];
@@ -1673,6 +1668,37 @@ function denormalizeAsm(func, data) {
   //printErr('denormalized \n\n' + astToSrc(func) + '\n\n');
 }
 
+function getFirstIndexInNormalized(func, data) {
+  // In a normalized asm function, return the index of the first element that is not not defs or annotation
+  var stats = func[3];
+  var i = stats.length-1;
+  while (i >= 0) {
+    var stat = stats[i];
+    if (stat[0] == 'var') break;
+    i--;
+  }
+  return i+1;
+}
+
+function getStackBumpNode(ast) {
+  var found = null;
+  traverse(ast, function(node, type) {
+    if (type === 'assign' && node[2][0] === 'name' && node[2][1] === 'STACKTOP') {
+      var value = node[3];
+      if (value[0] === 'name') return true;
+      assert(value[0] == 'binary' && value[1] == '|' && value[2][0] == 'binary' && value[2][1] == '+' && value[2][2][0] == 'name' && value[2][2][1] == 'STACKTOP' && value[2][3][0] == 'num');
+      found = node;
+      return true;
+    }
+  });
+  return found;
+}
+
+function getStackBumpSize(ast) {
+  var node = getStackBumpNode(ast);
+  return node ? node[3][2][3][1] : 0;
+}
+
 // Very simple 'registerization', coalescing of variables into a smaller number,
 // as part of minification. Globals-level minification began in a previous pass,
 // we receive extraInfo which tells us how to rename globals. (Only in asm.js.)
@@ -1717,7 +1743,7 @@ function registerize(ast) {
       }
     });
     vacuum(fun);
-    if (extraInfo) {
+    if (extraInfo && extraInfo.globals) {
       assert(asm);
       var usedGlobals = {};
       var nextLocal = 0;
@@ -1758,16 +1784,17 @@ function registerize(ast) {
           }
         }
       });
-      assert(fun[1] in extraInfo.globals, fun[1]);
-      fun[1] = extraInfo.globals[fun[1]];
-      assert(fun[1]);
+      if (fun[1] in extraInfo.globals) { // if fun was created by a previous optimization pass, it will not be here
+        fun[1] = extraInfo.globals[fun[1]];
+        assert(fun[1]);
+      }
       var nextRegName = 0;
     }
     var regTypes = {};
     function getNewRegName(num, name) {
       if (!asm) return 'r' + num;
       var type = asmData.vars[name];
-      if (!extraInfo) {
+      if (!extraInfo || !extraInfo.globals) {
         var ret = (type ? 'd' : 'i') + num;
         regTypes[ret] = type;
         return ret;
@@ -2790,7 +2817,7 @@ function relocate(ast) {
             var other = node[3];
             if (base === 0) return other;
             if (other[0] == 'num') {
-              other[1] += base;
+              other[1] = (other[1] + base)|0;
               return other;
             } else {
               node[2] = ['num', base];
@@ -2846,6 +2873,7 @@ function outline(ast) {
           defs[name] = node;
         } else {
           if (name in asmData.params) {
+            assignments[name] = (assignments[name] || 1) + 1; // init to 1 for initial parameter assignment
             considered[name] = true; // this parameter is not ssa, it must be in a hand-optimized function, so it is not trivial
           }
         }
@@ -2971,6 +2999,95 @@ function outline(ast) {
     });
   }
 
+  // Try to flatten out code as much as possible, to make outlining more feasible.
+  function flatten(func, asmData) {
+    var minSize = sizeToOutline;
+    var helperId = 0;
+    function getHelper() {
+      while (1) {
+        var ret = 'helper$' + (helperId++);
+        if (!(ret in asmData.vars) && !(ret in asmData.params)) {
+          asmData.vars[ret] = ASM_INT;
+          return ret;
+        }
+      }
+    }
+    var ignore = [];
+    traverse(func, function(node) {
+      var stats = getStatements(node);
+      if (stats) {
+        for (var i = 0; i < stats.length; i++) {
+          var node = stats[i]; // step over param
+          if (ignore.indexOf(node) >= 0) continue;
+          if (node[0] == 'stat') node = node[1];
+          if (ignore.indexOf(node) >= 0) continue;
+          var type = node[0];
+          if (measureSize(node) >= minSize) {
+            if (type === 'if' && node[3]) {
+              var reps = [];
+              var helper = getHelper();
+              // clear helper
+              reps.push(['stat', ['assign', true, ['name', helper], ['num', 1]]]); // 1 means continue in ifs
+              // gather parts
+              var parts = [];
+              var curr = node;
+              while (1) {
+                parts.push({ condition: curr[1], body: curr[2] });
+                curr = curr[3];
+                if (!curr) break;
+                if (curr[0] != 'if') {
+                  parts.push({ condition: null, body: curr });
+                  break;
+                }
+              }
+              // chunkify. Each chunk is a chain of if-elses, with the new overhead just on entry and exit
+              var chunks = [];
+              var currSize = 0;
+              var currChunk = [];
+              parts.forEach(function(part) {
+                var size = (part.condition ? measureSize(part.condition) : 0) + measureSize(part.body) + 5; // add constant for overhead of extra code
+                assert(size > 0);
+                if (size + currSize >= minSize && currSize) {
+                  chunks.push(currChunk);
+                  currChunk = [];
+                  currSize = 0;
+                }
+                currChunk.push(part);
+                currSize += size;
+              });
+              assert(currSize);
+              chunks.push(currChunk);
+              // generate flattened code
+              chunks.forEach(function(chunk) {
+                var pre = ['stat', ['assign', true, ['name', helper], ['num', 0]]];
+                var chain = null, tail = null;
+                chunk.forEach(function(part) {
+                  // add to chain
+                  var contents = makeIf(part.condition || ['num', 1], part.body[1]);
+                  if (chain) {
+                    tail[3] = contents;
+                  } else {
+                    chain = contents;
+                    ignore.push(contents);
+                  }
+                  tail = contents;
+                });
+                // if none of the ifs were entered, in the final else note that we need to continue
+                tail[3] = ['block', [['stat', ['assign', true, ['name', helper], ['num', 1]]]]];
+                reps.push(makeIf(['name', helper], [pre, chain]));
+              });
+              // replace code and update i
+              stats.splice.apply(stats, [i, 1].concat(reps));
+              i--; // negate loop increment
+              i += reps.length;
+              continue;
+            }
+          }
+        }
+      }
+    });
+  }
+
   // Prepares information for spilling of local variables
   function analyzeFunction(func, asmData) {
     var stack = []; // list of variables, each gets 8 bytes
@@ -2981,13 +3098,18 @@ function outline(ast) {
       stack.push(name);
     }
     asmData.stackPos = {};
+    var stackSize = getStackBumpSize(func);
+    if (stackSize % 8 === 0) stackSize += 8 - (stackSize % 8);
     for (var i = 0; i < stack.length; i++) {
-      asmData.stackPos[stack[i]] = i*8;
-    }
-    // Reserve an extra two spots: one for control flow var, the other for control flow data
-    asmData.stackSize = (stack.length + 2)*8;
-    asmData.controlStackPos = asmData.stackSize - 16;
-    asmData.controlDataStackPos = asmData.stackSize - 8;
+      asmData.stackPos[stack[i]] = stackSize + i*8;
+    }
+    // Reserve an extra two spots per possible outlining: one for control flow var, the other for control flow data
+    // The control variables are zeroed out when calling an outlined function, and after using
+    // the value after they return.
+    asmData.maxOutlinings = Math.round(3*measureSize(func)/extraInfo.sizeToOutline);
+    asmData.totalStackSize = stackSize + (stack.length + 2*asmData.maxOutlinings)*8;
+    asmData.controlStackPos = function(i) { return stackSize + (stack.length + i)*8 };
+    asmData.controlDataStackPos = function(i) { return stackSize + (stack.length + i)*8 + 4 };
     asmData.splitCounter = 0;
   }
 
@@ -3003,8 +3125,8 @@ function outline(ast) {
     });
 
     var writes = {};
-    var appearances = {};
-    var hasReturn = false, hasBreak = false, hasContinue = false;
+    var namings = {};
+    var hasReturn = false, hasReturnInt = false, hasReturnDouble = false, hasBreak = false, hasContinue = false;
     var breaks = {};    // set of labels we break or continue
     var continues = {}; // to (name -> id, just like labels)
     var breakCapturers = 0;
@@ -3014,16 +3136,21 @@ function outline(ast) {
       if (type == 'assign' && node[2][0] == 'name') {
         var name = node[2][1];
         if (name in asmData.vars || name in asmData.params) {
-          writes[name] = 0;
-          appearances[name] = (appearances[name] || 0) - 1; // this appearance is a definition, offset the counting later
+          writes[name] = (writes[name] || 0) + 1;
         }
       } else if (type == 'name') {
         var name = node[1];
         if (name in asmData.vars || name in asmData.params) {
-          appearances[name] = (appearances[name] || 0) + 1;
+          namings[name] = (namings[name] || 0) + 1;
         }
       } else if (type == 'return') {
-        hasReturn = true;
+        if (!node[1]) {
+          hasReturn = true;
+        } else if (detectAsmCoercion(node[1]) == ASM_INT) {
+          hasReturnInt = true;
+        } else {
+          hasReturnDouble = true;
+        }
       } else if (type == 'break') {
         var label = node[1] || 0;
         if (!label && breakCapturers > 0) return; // no label, and captured
@@ -3052,13 +3179,15 @@ function outline(ast) {
         continueCapturers--;
       }
     });
+    assert(hasReturn + hasReturnInt + hasReturnDouble <= 1);
 
     var reads = {};
-
-    for (var name in appearances) {
-      if (appearances[name] > 0) reads[name] = 0;
+    for (var v in namings) {
+      var actualReads = namings[v] - (writes[v] || 0);
+      if (actualReads > 0) reads[v] = actualReads;
     }
-    return { writes: writes, reads: reads, hasReturn: hasReturn, hasBreak: hasBreak, hasContinue: hasContinue, breaks: breaks, continues: continues, labels: labels };
+
+    return { writes: writes, reads: reads, hasReturn: hasReturn, hasReturnInt: hasReturnInt, hasReturnDouble: hasReturnDouble, hasBreak: hasBreak, hasContinue: hasContinue, breaks: breaks, continues: continues, labels: labels };
   }
 
   function makeAssign(dst, src) {
@@ -3081,73 +3210,108 @@ function outline(ast) {
 
   var CONTROL_BREAK = 1, CONTROL_BREAK_LABEL = 2, CONTROL_CONTINUE = 3, CONTROL_CONTINUE_LABEL = 4, CONTROL_RETURN_VOID = 5, CONTROL_RETURN_INT = 6, CONTROL_RETURN_DOUBLE = 7;
 
-  var sizeToOutline = extraInfo.sizeToOutline;
+  var sizeToOutline = null; // customized per function and as we make progress
+  function calculateThreshold(func) {
+    sizeToOutline = extraInfo.sizeToOutline;
+    var size = measureSize(func);
+    //var desiredChunks = Math.ceil(size/extraInfo.sizeToOutline);
+    ////sizeToOutline = Math.round((extraInfo.sizeToOutline + (2*size/desiredChunks))/3);
+    //sizeToOutline = Math.round(size/desiredChunks);
+    printErr('trying to reduce the size of ' + func[1] + ' which is ' + size + ' (>= ' + extraInfo.sizeToOutline + '), aim for ' + sizeToOutline);
+  }
+
   var level = 0;
+  var outliningParents = {}; // function name => parent it was outlined from
 
   function doOutline(func, asmData, stats, start, end) {
-    printErr(' do outline ' + [func[1], level, 'range:', start, end, 'of', stats.length]);
+    if (asmData.splitCounter === asmData.maxOutlinings) return [];
+    if (!extraInfo.allowCostlyOutlines) var originalStats = copy(stats);
     var code = stats.slice(start, end+1);
-    var newIdent = func[1] + '$' + (asmData.splitCounter++);
-    // add spills and reads before and after the call to the outlined code, and in the outlined code itself
+    var originalCodeSize =  measureSize(code);
+    var funcSize = measureSize(func);
+    var outlineIndex = asmData.splitCounter++;
+    var newIdent = func[1] + '$' + outlineIndex;
+    // analyze variables, and find 'owned' variables - that only appear in the outlined code, and do not need any spill support
     var codeInfo = analyzeCode(func, asmData, code);
+    var allCodeInfo = analyzeCode(func, asmData, func);
+    var owned = { sp: 1 }; // sp is always owned, each has its own
+    keys(setUnion(codeInfo.reads, codeInfo.writes)).forEach(function(v) {
+      if (allCodeInfo.reads[v] === codeInfo.reads[v] && allCodeInfo.writes[v] === codeInfo.writes[v] && !(v in asmData.params)) {
+        owned[v] = 1;
+      }
+    });
     var reps = [];
-    for (var v in codeInfo.reads) {
-      if (v != 'sp') {
-        reps.push(['stat', ['assign', true, ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], ['name', v]]]);
-        code.unshift(['stat', ['assign', true, ['name', v], ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]]]]);
+    // wipe out control variable
+    reps.push(['stat', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos(outlineIndex)), ['num', 0])]);
+    reps.push(['stat', makeAssign(makeStackAccess(ASM_INT, asmData.controlDataStackPos(outlineIndex)), ['num', 0])]); // XXX not really needed
+    // add spills and reads before and after the call to the outlined code, and in the outlined code itself
+    keys(setUnion(codeInfo.reads, codeInfo.writes)).forEach(function(v) {
+      if (!(v in owned)) {
+        reps.push(['stat', ['assign', true, ['sub', ['name', getAsmType(v, asmData) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], ['name', v]]]);
       }
-    }
+    });
     reps.push(['stat', ['call', ['name', newIdent], [['name', 'sp']]]]);
     for (var v in codeInfo.writes) {
-      reps.push(['stat', ['assign', true, ['name', v], ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]]]]);
-      code.push(['stat', ['assign', true, ['sub', ['name', getAsmType(asmData, v) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], ['name', v]]]);
+      if (!(v in owned)) {
+        reps.push(['stat', ['assign', true, ['name', v], makeAsmCoercion(['sub', ['name', getAsmType(v, asmData) == ASM_INT ? 'HEAP32' : 'HEAPF32'], ['binary', '>>', ['binary', '+', ['name', 'sp'], ['num', asmData.stackPos[v]]], ['num', '2']]], getAsmType(v, asmData))]]);
+      }
     }
     // Generate new function
-    if (codeInfo.hasReturn || codeInfo.hasBreak || codeInfo.hasContinue) {
+    if (codeInfo.hasReturn || codeInfo.hasReturnInt || codeInfo.hasReturnDouble || codeInfo.hasBreak || codeInfo.hasContinue) {
       // we need to capture all control flow using a top-level labeled one-time loop in the outlined function
       var breakCapturers = 0;
       var continueCapturers = 0;
-      traverse(code, function(node, type) {
+      traverse(['block', code], function(node, type) { // traverse on dummy block, so we get the toplevel statements
         // replace all break/continue/returns with code to break out of the main one-time loop, and set the control data
-        if (type == 'return') {
-          var ret = ['break', 'OL'];
-          if (!node[1]) {
-            ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', CONTROL_RETURN_VOID]), ret];
-          } else {
-            var type = detectAsmCoercion(node[1], asmData);
-            ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', type == ASM_INT ? CONTROL_RETURN_INT : CONTROL_RETURN_DOUBLE]), ret];
-            ret = ['seq', makeAssign(makeStackAccess(type, asmData.controlDataStackPos), node[1]), ret];
-          }
-          return ret;
-        } else if (type == 'break') {
-          var label = node[1] || 0;
-          if (label == 'OL') return; // this was just added before us, it is new replacement code
-          if (!label && breakCapturers > 0) return; // no label, and captured
-          if (label && (label in codeInfo.labels)) return; // label, and defined in this code, so captured
-          var ret = ['break', 'OL'];
-          ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlStackPos), ['num', label ? CONTROL_BREAK_LABEL : CONTROL_BREAK]), ret];
-          if (label) {
-            assert(label in codeInfo.breaks);
-            ret = ['seq', makeAssign(makeStackAccess(ASM_INT, asmData.controlDataStackPos), ['num', codeInfo.breaks[label]]), ret];
-          }
-          return ret;
-        } else if (type == 'continue') {
-          var label = node[1] || 0;
-          if (!label && continueCapturers > 0) return; // no label, and captured
-          if (label && (label in codeInfo.labels)) return; // label, and defined in this code, so captured
-          var ret = ['break', 'OL'];
-          ret = ['seq', makeAssign(mak