9 files changed, 217 insertions, 46 deletions
diff --git a/emscripten.py b/emscripten.py
index 800ca8d7..7bf1bb03 100755
--- a/emscripten.py
+++ b/emscripten.py
@@ -25,6 +25,8 @@ WARNING: You should normally never use this! Use emcc instead.
 
 from tools import shared
 
+DEBUG = os.environ.get('EMCC_DEBUG')
+
 __rootpath__ = os.path.abspath(os.path.dirname(__file__))
 def path_from_root(*pathelems):
   """Returns the absolute path for which the given path elements are
@@ -35,21 +37,114 @@ def path_from_root(*pathelems):
 temp_files = shared.TempFiles()
 
 
+def scan(ll, settings):
+  # blockaddress(@main, %23)
+  blockaddrs = []
+  for blockaddr in re.findall('blockaddress\([^)]*\)', ll):
+    b = blockaddr.split('(')[1][:-1].split(', ')
+    blockaddrs.append(b)
+  if len(blockaddrs) > 0:
+    settings['NECESSARY_BLOCKADDRS'] = blockaddrs
+
 def emscript(infile, settings, outfile, libraries=[]):
-  """Runs the emscripten LLVM-to-JS compiler.
+  """Runs the emscripten LLVM-to-JS compiler. We parallelize as much as possible
 
   Args:
     infile: The path to the input LLVM assembly file.
-    settings: JSON-formatted string of settings that overrides the values
+    settings: JSON-formatted settings that override the values
       defined in src/settings.js.
     outfile: The file where the output is written.
   """
-  settings_file = temp_files.get('.txt').name # Save settings to a file to work around v8 issue 1579
+
+  compiler = path_from_root('src', 'compiler.js')
+
+  # Parallelization: We run 3 passes:
+  #   1 aka 'pre'  : Process types and metadata and so forth, and generate the preamble.
+  #   2 aka 'funcs': Process functions. We can parallelize this, working on each function independently.
+  #   3 aka 'post' : Process globals, generate postamble and finishing touches.
+
+  # Pre-scan ll and alter settings as necessary
+  ll = open(infile).read()
+  scan(ll, settings)
+  ll = None # allow collection
+
+  # Split input into the relevant parts for each phase
+  pre = ''
+  funcs = [] # split up functions here, for parallelism later
+  meta = '' # needed by each function XXX
+  post = ''
+
+  in_func = False
+  ll_lines = open(infile).readlines()
+  for line in ll_lines:
+    if in_func:
+      funcs[-1] += line
+      if line.startswith('}'):
+        in_func = False
+        #pre += line # XXX pre needs function defs?
+    else:
+      if line.startswith('define '):
+        in_func = True
+        funcs.append(line)
+        #pre += line # XXX pre needs function defs?
+      elif line.find(' = type { ') > 0:
+        pre += line # type
+      elif line.startswith('!'):
+        meta += line # metadata
+      else:
+        post += line # global
+        pre += line # pre needs it to, so we know about globals in pre and funcs
+  ll_lines = None
+
+  #print '========= pre ================\n'
+  #print pre
+  #print '========== funcs ===============\n'
+  #for func in funcs:
+  #  print '\n// ===\n\n', func
+  #print '========== post ==============\n'
+  #print post
+  #print '=========================\n'
+
+  # Save settings to a file to work around v8 issue 1579
+  settings_file = temp_files.get('.txt').name
   s = open(settings_file, 'w')
-  s.write(settings)
+  s.write(json.dumps(settings))
   s.close()
-  compiler = path_from_root('src', 'compiler.js')
-  shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, infile] + libraries, stdout=outfile, cwd=path_from_root('src'))
+
+  # Pass 1
+  if DEBUG: print >> sys.stderr, 'phase 1'
+  pre_file = temp_files.get('.ll').name
+  open(pre_file, 'w').write(pre)
+  out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, pre_file, 'pre'] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
+  js, forwarded_data = out.split('//FORWARDED_DATA:')
+  #print 'js', js
+  #print >> sys.stderr, 'FORWARDED_DATA 1:', forwarded_data, type(forwarded_data)
+  forwarded_file = temp_files.get('.json').name
+  open(forwarded_file, 'w').write(forwarded_data)
+
+  # Pass 2
+  if DEBUG: print >> sys.stderr, 'phase 2'
+  funcs_file = temp_files.get('.ll').name
+  open(funcs_file, 'w').write('\n'.join(funcs) + '\n' + meta)
+  #print 'pass 2c..'#, open(funcs_file).read()
+  out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, funcs_file, 'funcs', forwarded_file] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
+  funcs_js, forwarded_data = out.split('//FORWARDED_DATA:')
+  #print 'js', js
+  forwarded_file += '2'
+  #print >> sys.stderr, 'FORWARDED_DATA 2:', forwarded_data, type(forwarded_data), forwarded_file
+  open(forwarded_file, 'w').write(forwarded_data)
+  # XXX must coordinate function indexixing data when parallelizing
+  #print 'OUT\n', out
+  js += funcs_js
+
+  # Pass 3
+  if DEBUG: print >> sys.stderr, 'phase 3'
+  post_file = temp_files.get('.ll').name
+  open(post_file, 'w').write(post)
+  out = shared.run_js(compiler, shared.COMPILER_ENGINE, [settings_file, post_file, 'post', forwarded_file] + libraries, stdout=subprocess.PIPE, cwd=path_from_root('src'))
+  js += out
+
+  outfile.write(js)
   outfile.close()
 
 
@@ -127,7 +222,7 @@ def main(args):
   libraries = args.libraries[0].split(',') if len(args.libraries) > 0 else []
 
   # Compile the assembly to Javascript.
-  emscript(args.infile, json.dumps(settings), args.outfile, libraries)
+  emscript(args.infile, settings, args.outfile, libraries)
 
 if __name__ == '__main__':
   parser = optparse.OptionParser(
diff --git a/src/analyzer.js b/src/analyzer.js
index 1849b58d..a6a37400 100644
--- a/src/analyzer.js
+++ b/src/analyzer.js
@@ -1346,6 +1346,14 @@ function analyzer(data, sidePass) {
             }
           });
         });
+
+        if (func.ident in NECESSARY_BLOCKADDRS) {
+          Functions.blockAddresses[func.ident] = {};
+          for (var needed in NECESSARY_BLOCKADDRS[func.ident]) {
+            assert(needed in func.labelIds);
+            Functions.blockAddresses[func.ident][needed] = func.labelIds[needed];
+          }
+        }
       });
       this.forwardItem(item, 'StackAnalyzer');
     }
diff --git a/src/compiler.js b/src/compiler.js
index e589646b..3220c977 100644
--- a/src/compiler.js
+++ b/src/compiler.js
@@ -129,7 +129,13 @@ load('settings.js');
 
 var settings_file = arguments_[0];
 var ll_file = arguments_[1];
-additionalLibraries = Array.prototype.slice.call(arguments_, 2);
+phase = arguments_[2];
+if (phase == 'pre') {
+  additionalLibraries = Array.prototype.slice.call(arguments_, 3);
+} else {
+  var forwardedDataFile = arguments_[3];
+  additionalLibraries = Array.prototype.slice.call(arguments_, 4);
+}
 
 if (settings_file) {
   var settings = JSON.parse(read(settings_file));
@@ -191,6 +197,15 @@ load('jsifier.js');
 globalEval(processMacros(preprocess(read('runtime.js'))));
 Runtime.QUANTUM_SIZE = QUANTUM_SIZE;
 
+var temp = {};
+for (var i = 0; i < NECESSARY_BLOCKADDRS.length; i++) {
+  var func = toNiceIdent(NECESSARY_BLOCKADDRS[i][0]);
+  var label = toNiceIdent(NECESSARY_BLOCKADDRS[i][1]);
+  if (!temp[func]) temp[func] = {};
+  temp[func][label] = 1;
+}
+NECESSARY_BLOCKADDRS = temp;
+
 //===============================
 // Main
 //===============================
@@ -209,8 +224,17 @@ raw = null;
 
 // Pre-process the LLVM assembly
 
+//printErr('JS compiler in action, phase ' + phase);
+
 Debugging.handleMetadata(lines);
-PreProcessor.eliminateUnneededIntrinsics(lines);
+
+if (phase != 'pre') {
+  PassManager.load(read(forwardedDataFile));
+
+  if (phase == 'funcs') {
+    PreProcessor.eliminateUnneededIntrinsics(lines);
+  }
+}
 
 // Do it
 
diff --git a/src/jsifier.js b/src/jsifier.js
index 78f48118..a28b34ec 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -13,33 +13,36 @@ function JSify(data, functionsOnly, givenFunctions) {
   var mainPass = !functionsOnly;
 
   if (mainPass) {
-    // We will start to print out the data, but must do so carefully - we are
-    // dealing with potentially *huge* strings. Convenient replacements and
-    // manipulations may create in-memory copies, and we may OOM.
-    //
-    // Final shape that will be created:
-    //    shell
-    //      (body)
-    //        preamble
-    //          runtime
-    //        generated code
-    //        postamble
-    //          global_vars
-    //
-    // First, we print out everything until the generated code. Then the
-    // functions will print themselves out as they are parsed. Finally, we
-    // will call finalCombiner in the main pass, to print out everything
-    // else. This lets us not hold any strings in memory, we simply print
-    // things out as they are ready.
-
     var shellFile = SHELL_FILE ? SHELL_FILE : (BUILD_AS_SHARED_LIB ? 'shell_sharedlib.js' : 'shell.js');
-    var shellParts = read(shellFile).split('{{BODY}}');
-    print(shellParts[0]);
-    var preFile = BUILD_AS_SHARED_LIB ? 'preamble_sharedlib.js' : 'preamble.js';
-    var pre = processMacros(preprocess(read(preFile).replace('{{RUNTIME}}', getRuntime())));
-    print(pre);
 
-    Functions.implementedFunctions = set(data.unparsedFunctions.map(function(func) { return func.ident }));
+    if (phase == 'pre') {
+      // We will start to print out the data, but must do so carefully - we are
+      // dealing with potentially *huge* strings. Convenient replacements and
+      // manipulations may create in-memory copies, and we may OOM.
+      //
+      // Final shape that will be created:
+      //    shell
+      //      (body)
+      //        preamble
+      //          runtime
+      //        generated code
+      //        postamble
+      //          global_vars
+      //
+      // First, we print out everything until the generated code. Then the
+      // functions will print themselves out as they are parsed. Finally, we
+      // will call finalCombiner in the main pass, to print out everything
+      // else. This lets us not hold any strings in memory, we simply print
+      // things out as they are ready.
+
+      var shellParts = read(shellFile).split('{{BODY}}');
+      print(shellParts[0]);
+      var preFile = BUILD_AS_SHARED_LIB ? 'preamble_sharedlib.js' : 'preamble.js';
+      var pre = processMacros(preprocess(read(preFile).replace('{{RUNTIME}}', getRuntime())));
+      print(pre);
+    } else if (phase == 'funcs') {
+      Functions.implementedFunctions = set(data.unparsedFunctions.map(function(func) { return func.ident }));
+    }
   }
 
   // Does simple 'macro' substitution, using Django-like syntax,
@@ -1230,10 +1233,16 @@ function JSify(data, functionsOnly, givenFunctions) {
       return;
     }
 
-    // This is the main pass. Print out the generated code that we have here, together with the
+    if (phase == 'pre' || phase == 'funcs') {
+      // serialize out the data that later passes need
+      PassManager.serialize(); // XXX for funcs pass, do not serialize it all. I think we just need which were indexized.
+      return;
+    }
+
+    // This is the main 'post' pass. Print out the generated code that we have here, together with the
     // rest of the output that we started to print out earlier (see comment on the
     // "Final shape that will be created").
-    if (PRECISE_I64_MATH && preciseI64MathUsed) {
+    if (PRECISE_I64_MATH && Types.preciseI64MathUsed) {
       print(read('long.js'));
     } else {
       print('// Warning: printing of i64 values may be slightly rounded! No deep i64 math used, so precise i64 code not included');
@@ -1264,6 +1273,7 @@ function JSify(data, functionsOnly, givenFunctions) {
 
     print(postParts[1]);
 
+    var shellParts = read(shellFile).split('{{BODY}}');
     print(shellParts[1]);
     // Print out some useful metadata (for additional optimizations later, like the eliminator)
     print('// EMSCRIPTEN_GENERATED_FUNCTIONS: ' + JSON.stringify(Functions.allIdents) + '\n');
diff --git a/src/library.js b/src/library.js
index 8e30ba31..73109334 100644
--- a/src/library.js
+++ b/src/library.js
@@ -5042,7 +5042,7 @@ LibraryManager.library = {
     };
   },
 
-  llvm_uadd_with_overflow_i64__deps: [function() { preciseI64MathUsed = 1 }],
+  llvm_uadd_with_overflow_i64__deps: [function() { Types.preciseI64MathUsed = 1 }],
   llvm_uadd_with_overflow_i64: function(xl, xh, yl, yh) {
     i64Math.add(xl, xh, yl, yh);
     return {
@@ -5051,7 +5051,7 @@ LibraryManager.library = {
     };
   },
 
-  llvm_umul_with_overflow_i64__deps: [function() { preciseI64MathUsed = 1 }],
+  llvm_umul_with_overflow_i64__deps: [function() { Types.preciseI64MathUsed = 1 }],
   llvm_umul_with_overflow_i64: function(xl, xh, yl, yh) {
     i64Math.mul(xl, xh, yl, yh);
     return {
diff --git a/src/modules.js b/src/modules.js
index ba9f9482..d100b8d4 100644
--- a/src/modules.js
+++ b/src/modules.js
@@ -205,7 +205,10 @@ var Types = {
     }, this);
   },
 
-  needAnalysis: {} // Types noticed during parsing, that need analysis
+  needAnalysis: {}, // Types noticed during parsing, that need analysis
+
+  preciseI64MathUsed: false // Set to true if we actually use precise i64 math: If PRECISE_I64_MATH is set, and also such math is actually
+                            // needed (+,-,*,/,% - we do not need it for bitops)
 };
 
 var Functions = {
@@ -213,7 +216,7 @@ var Functions = {
   currFunctions: [],
 
   // All functions that will be implemented in this file
-  implementedFunctions: null,
+  implementedFunctions: [],
 
   // All the function idents seen so far
   allIdents: [],
@@ -221,6 +224,8 @@ var Functions = {
   indexedFunctions: {},
   nextIndex: 2, // Start at a non-0 (even, see below) value
 
+  blockAddresses: {}, // maps functions to a map of block labels to label ids
+
   // Mark a function as needing indexing, and returns the index
   getIndex: function(ident) {
     var ret = this.indexedFunctions[ident];
@@ -296,3 +301,30 @@ function cDefine(key) {
   return key in C_DEFINES ? C_DEFINES[key] : ('0 /* XXX missing C define ' + key + ' */');
 }
 
+var PassManager = {
+  serialize: function() {
+    print('\n//FORWARDED_DATA:' + JSON.stringify({
+      Types: Types,
+      Variables: Variables,
+      Functions: Functions
+    }));
+  },
+  load: function(json) {
+    var data = JSON.parse(json);
+    for (var i in data.Types) {
+      Types[i] = data.Types[i];
+    }
+    for (var i in data.Variables) {
+      Variables[i] = data.Variables[i];
+    }
+    for (var i in data.Functions) {
+      Functions[i] = data.Functions[i];
+    }
+    print('\n//LOADED_DATA:' + phase + ':' + JSON.stringify({
+      Types: Types,
+      Variables: Variables,
+      Functions: Functions
+    }));
+  }
+};
+
diff --git a/src/parseTools.js b/src/parseTools.js
index 2cdea7c0..e76d23be 100644
--- a/src/parseTools.js
+++ b/src/parseTools.js
@@ -1598,8 +1598,6 @@ function isSignedOp(op, variant) {
 }
 
 var legalizedI64s = USE_TYPED_ARRAYS == 2; // We do not legalize globals, but do legalize function lines. This will be true in the latter case
-var preciseI64MathUsed = false; // Set to true if we actually use precise i64 math: If PRECISE_I64_MATH is set, and also such math is actually
-                                // needed (+,-,*,/,% - we do not need it for bitops)
 
 function processMathop(item) {
   var op = item.op;
@@ -1657,7 +1655,7 @@ function processMathop(item) {
       }
     }
     function i64PreciseOp(type, lastArg) {
-      preciseI64MathUsed = true;
+      Types.preciseI64MathUsed = true;
       return finish(['(i64Math.' + type + '(' + low1 + ',' + high1 + ',' + low2 + ',' + high2 +
                      (lastArg ? ',' + lastArg : '') + '),i64Math.result[0])', 'i64Math.result[1]']);
     }
@@ -1818,7 +1816,7 @@ function processMathop(item) {
     case 'sdiv': case 'udiv': return makeRounding(getFastValue(idents[0], '/', idents[1], item.type), bits, op[0] === 's');
     case 'mul': {
       if (bits == 32 && PRECISE_I32_MUL) {
-        preciseI64MathUsed = true;
+        Types.preciseI64MathUsed = true;
         return '(i64Math.multiply(' + idents[0] + ',0,' + idents[1] + ',0),i64Math.result[0])';
       } else {
         return handleOverflow(getFastValue(idents[0], '*', idents[1], item.type), bits);
@@ -2002,7 +2000,9 @@ function parseBlockAddress(segment) {
 }
 
 function finalizeBlockAddress(param) {
-  return Functions.currFunctions[param.func].labelIds[param.label]; // XXX We rely on currFunctions here...?
+  assert(param.func in Functions.blockAddresses);
+  assert(param.label in Functions.blockAddresses[param.func]);
+  return Functions.blockAddresses[param.func][param.label];
 }
 
 function stripCorrections(param) {
diff --git a/src/settings.js b/src/settings.js
index 03c84eae..24949e25 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -276,6 +276,8 @@ var SMALL_XHR_CHUNKS = 0; // Use small chunk size for binary synchronous XHR's i
                           // Used for testing.
                           // See test_chunked_synchronous_xhr in runner.py and library.js.
 
+var NECESSARY_BLOCKADDRS = []; // List of (function, block) for all block addresses that are taken.
+
 // Compiler debugging options
 var DEBUG_TAGS_SHOWING = [];
   // Some useful items:
diff --git a/tests/cases/ptrtoint_blockaddr.ll b/tests/cases/ptrtoint_blockaddr.ll
index 1090bc29..68b29300 100644
--- a/tests/cases/ptrtoint_blockaddr.ll
+++ b/tests/cases/ptrtoint_blockaddr.ll
@@ -12,7 +12,7 @@ label555:                                     ; preds = %353
 label569:                                     ; preds = %555
   %333 = call i32 @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0]
   ; this should compile ok
-  store i32 ptrtoint (i8* blockaddress(@main, %569) to i32), i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), align 8
+  store i32 ptrtoint (i8* blockaddress(@main, %label569) to i32), i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), align 8
   ret i32 0
 }