11 files changed, 131 insertions, 44 deletions
diff --git a/emcc b/emcc
index e08de1cb..6bb209a5 100755
--- a/emcc
+++ b/emcc
@@ -138,6 +138,20 @@ Options that are modified or new in %s include:
                               generated code!
   --closure <on>           0: No closure compiler (default in -O0, -O1)
                            1: Run closure compiler (default in -O2, -O3)
+  --js-transform <cmd>     <cmd> will be called on the generated code
+                           before it is optimized. This lets you modify
+                           the JavaScript, for example adding some code
+                           or removing some code, in a way that those
+                           modifications will be optimized together with
+                           the generated code properly. <cmd> will be
+                           called with the filename of the generated
+                           code as a parameter; to modify the code, you
+                           can read the original data and then append to
+                           it or overwrite it with the modified data.
+                           <cmd> is interpreted as a space-separated
+                           list of arguments, for example, <cmd> of
+                           "python processor.py" will cause a python
+                           script to be run.
 
 The target file, if specified (-o <target>), defines what will
 be generated:
@@ -254,6 +268,7 @@ try:
   opt_level = 0
   llvm_opt_level = None
   closure = None
+  js_transform = None
 
   def check_bad_eq(arg):
     assert '=' not in arg, 'Invalid parameter (do not use "=" with "--" options)'
@@ -277,6 +292,11 @@ try:
       closure = int(newargs[i+1])
       newargs[i] = ''
       newargs[i+1] = ''
+    elif newargs[i].startswith('--js-transform'):
+      check_bad_eq(newargs[i])
+      js_transform = newargs[i+1]
+      newargs[i] = ''
+      newargs[i+1] = ''
     elif newargs[i] == '-MF': # clang cannot handle this, so we fake it
       f = open(newargs[i+1], 'w')
       f.write('\n')
@@ -340,7 +360,10 @@ try:
     assert has_source_inputs, 'Must have source code inputs to use -c'
     target = target_basename + '.o'
 
-  final_suffix = target.split('.')[-1]
+  if '.' in target:
+    final_suffix = target.split('.')[-1]
+  else:
+    final_suffix = ''
 
   # Apply optimization level settings
   shared.Settings.apply_opt_level(opt_level, noisy=True)
@@ -461,15 +484,15 @@ try:
     print >> sys.stderr, 'emcc: saving intermediate processing steps to %s' % shared.EMSCRIPTEN_TEMP_DIR
 
     intermediate_counter = 0
-    def save_intermediate(name=None):
+    def save_intermediate(name=None, suffix='js'):
       global intermediate_counter
-      shutil.copyfile(final, os.path.join(shared.EMSCRIPTEN_TEMP_DIR, 'emcc-%d%s.js' % (intermediate_counter, '' if name is None else '-' + name)))
+      shutil.copyfile(final, os.path.join(shared.EMSCRIPTEN_TEMP_DIR, 'emcc-%d%s.%s' % (intermediate_counter, '' if name is None else '-' + name, suffix)))
       intermediate_counter += 1
 
   if not LEAVE_INPUTS_RAW:
     final = in_temp(target_basename + '.bc')
     final = shared.Building.llvm_dis(final, final + '.ll')
-    if DEBUG: save_intermediate('ll')
+    if DEBUG: save_intermediate('ll', 'll')
   else:
     assert len(input_files) == 1
     final = input_files[0]
@@ -478,12 +501,11 @@ try:
   if DEBUG: save_intermediate('original')
 
   # Apply a source code transformation, if requested
-  source_transform = os.environ.get('EMCC_JS_PROCESSOR')
-  if source_transform:
-    exec source_transform in locals()
+  if js_transform:
     shutil.copyfile(final, final + '.tr.js')
     final += '.tr.js'
-    process(final)
+    if DEBUG: print >> sys.stderr, 'emcc: applying transform: %s' % js_transform
+    Popen(js_transform.split(' ') + [os.path.abspath(final)]).communicate()
     if DEBUG: save_intermediate('transformed')
 
   if opt_level >= 1:
diff --git a/src/analyzer.js b/src/analyzer.js
index 1e03a1d6..974dd67d 100644
--- a/src/analyzer.js
+++ b/src/analyzer.js
@@ -22,7 +22,9 @@ var SIDE_EFFECT_CAUSERS = set('call', 'invoke');
 
 // Analyzer
 
-function analyzer(data) {
+function analyzer(data, sidePass) {
+  var mainPass = !sidePass;
+
   // Substrate
   var substrate = new Substrate('Analyzer');
 
@@ -46,6 +48,7 @@ function analyzer(data) {
       var temp = splitter(item.items, function(item) { return item.intertype == 'type' });
       item.items = temp.leftIn;
       temp.splitOut.forEach(function(type) {
+        //dprint('types', 'adding defined type: ' + type.name_);
         Types.types[type.name_] = type;
         if (QUANTUM_SIZE === 1) {
           Types.fatTypes[type.name_] = copy(type);
@@ -175,10 +178,14 @@ function analyzer(data) {
   // Typevestigator
   substrate.addActor('Typevestigator', {
     processItem: function(data) {
-      for (var type in Types.needAnalysis) {
-        if (type) addType(type, data);
+      if (sidePass) { // Do not investigate in the main pass - it is only valid to start to do so in the first side pass,
+                      // which handles type definitions, and later. Doing so before the first side pass will result in
+                      // making bad guesses about types which are actually defined
+        for (var type in Types.needAnalysis) {
+          if (type) addType(type, data);
+        }
+        Types.needAnalysis = {};
       }
-      Types.needAnalysis = {};
       this.forwardItem(data, 'Typeanalyzer');
     }
   });
diff --git a/src/jsifier.js b/src/jsifier.js
index 0cdafb5a..a750f805 100644
--- a/src/jsifier.js
+++ b/src/jsifier.js
@@ -55,7 +55,7 @@ function JSify(data, functionsOnly, givenFunctions) {
 
   if (mainPass) {
     // Handle unparsed types TODO: Batch them
-    analyzer(intertyper(data.unparsedTypess[0].lines, true));
+    analyzer(intertyper(data.unparsedTypess[0].lines, true), true);
     data.unparsedTypess = null;
 
     // Add additional necessary items for the main pass. We can now do this since types are parsed (types can be used through
@@ -120,7 +120,7 @@ function JSify(data, functionsOnly, givenFunctions) {
     dprint('unparsedFunctions','====================\n// Processing function batch of ' + currBaseLineNums.length +
                                ' functions, ' + currFuncLines.length + ' lines, functions left: ' + data.unparsedFunctions.length);
     if (DEBUG_MEMORY) MemoryDebugger.tick('pre-func');
-    JSify(analyzer(intertyper(currFuncLines, true, currBaseLineNums)), true, Functions);
+    JSify(analyzer(intertyper(currFuncLines, true, currBaseLineNums), true), true, Functions);
     if (DEBUG_MEMORY) MemoryDebugger.tick('post-func');
   }
   currFuncLines = currBaseLineNums = null; // Do not hold on to anything from inside that loop (JS function scoping..)
@@ -1150,7 +1150,7 @@ function JSify(data, functionsOnly, givenFunctions) {
     print(postParts[0]);
 
     // Print out global variables and postsets TODO: batching
-    JSify(analyzer(intertyper(data.unparsedGlobalss[0].lines, true)), true, Functions);
+    JSify(analyzer(intertyper(data.unparsedGlobalss[0].lines, true), true), true, Functions);
     data.unparsedGlobalss = null;
 
     print(Functions.generateIndexing()); // done last, as it may rely on aliases set in postsets
diff --git a/src/modules.js b/src/modules.js
index 0bc8894c..1e996fca 100644
--- a/src/modules.js
+++ b/src/modules.js
@@ -48,7 +48,7 @@ var Debugging = {
     var form3 = new RegExp(/^!(\d+) = metadata !{i32 (\d+), (?:i32 \d+|null), metadata !(\d+), .*}$/);
     var form3a = new RegExp(/^!(\d+) = metadata !{i32 \d+, (?:i32 \d+|metadata !\d+), (?:i32 \d+|null), (?:i32 \d+|null), metadata !(\d+), (?:i32 \d+|null)}.*/);
     var form3ab = new RegExp(/^!(\d+) = metadata !{i32 \d+, (?:metadata !\d+|i32 \d+|null), metadata !(\d+).*$/);
-    var form3ac = new RegExp(/^!(\d+) = metadata !{i32 \d+, (?:metadata !\d+|null), metadata !"[^"]+", metadata !(\d+)[^\[]*.*$/);
+    var form3ac = new RegExp(/^!(\d+) = metadata !{i32 \d+, (?:metadata !\d+|null), metadata !"[^"]*", metadata !(\d+)[^\[]*.*$/);
     var form3ad = new RegExp(/^!(\d+) = metadata !{i32 \d+, (?:i32 \d+|null), (?:i32 \d+|null), metadata !"[^"]*", metadata !"[^"]*", metadata !"[^"]*", metadata !(\d+),.*$/);
     var form3b = new RegExp(/^!(\d+) = metadata !{i32 \d+, metadata !"([^"]+)", metadata !"([^"]+)", (metadata !\d+|null)}.*$/);
     var form3c = new RegExp(/^!(\d+) = metadata !{\w+\d* !?(\d+)[^\d].*$/);
diff --git a/src/preamble.js b/src/preamble.js
index 51e22390..c8e3570a 100644
--- a/src/preamble.js
+++ b/src/preamble.js
@@ -578,6 +578,7 @@ function enlargeMemory() {
 }
 #endif
 
+var TOTAL_STACK = Module['TOTAL_STACK'] || {{{ TOTAL_STACK }}};
 var TOTAL_MEMORY = Module['TOTAL_MEMORY'] || {{{ TOTAL_MEMORY }}};
 var FAST_MEMORY = Module['FAST_MEMORY'] || {{{ FAST_MEMORY }}};
 
@@ -641,7 +642,6 @@ Module['HEAPF32'] = HEAPF32;
 #endif
 
 STACK_ROOT = STACKTOP = alignMemoryPage(10);
-var TOTAL_STACK = 1024*1024; // XXX: Changing this value can lead to bad perf on v8!
 STACK_MAX = STACK_ROOT + TOTAL_STACK;
 
 STATICTOP = alignMemoryPage(STACK_MAX);
diff --git a/src/runtime.js b/src/runtime.js
index 39f522a7..76b01089 100644
--- a/src/runtime.js
+++ b/src/runtime.js
@@ -34,7 +34,7 @@ var RuntimeGenerator = {
     if (USE_TYPED_ARRAYS === 2) initial = Runtime.forceAlign(initial);
     var ret = 'var __stackBase__  = STACKTOP; STACKTOP += ' + initial;
     if (ASSERTIONS) {
-      ret += '; assert(STACKTOP < STACK_MAX)';
+      ret += '; assert(STACKTOP < STACK_MAX, "Ran out of stack")';
     }
     if (INIT_STACK) {
       ret += '; _memset(__stackBase__, 0, ' + initial + ')';
diff --git a/src/settings.js b/src/settings.js
index c5b10454..ee9f10b9 100644
--- a/src/settings.js
+++ b/src/settings.js
@@ -31,11 +31,15 @@ var INVOKE_RUN = 1; // Whether we will call run(). Disable if you embed the gene
                     // code in your own, and will call run() yourself at the right time
 var INIT_STACK = 0; // Whether to initialize memory on the stack to 0.
 var INIT_HEAP = 0; // Whether to initialize memory anywhere other than the stack to 0.
-var FAST_MEMORY = 2*1024*1024; // The amount of memory to initialize to 0. This ensures it will be
-                               // in a flat array. This only matters in non-typed array builds.
+var TOTAL_STACK = 5*1024*1024; // The total stack size. There is no way to enlarge the stack, so this
+                               // value must be large enough for the program's requirements. If
+                               // assertions are on, we will assert on not exceeding this, otherwise,
+                               // it will fail silently.
 var TOTAL_MEMORY = 10*1024*1024; // The total amount of memory to use. Using more memory than this will
                                  // cause us to expand the heap, which can be costly with typed arrays:
                                  // we need to copy the old heap into a new one in that case.
+var FAST_MEMORY = 2*1024*1024; // The amount of memory to initialize to 0. This ensures it will be
+                               // in a flat array. This only matters in non-typed array builds.
 
 // Code embetterments
 var MICRO_OPTS = 1; // Various micro-optimizations, like nativizing variables
diff --git a/tests/cases/gepoverflow.txt b/tests/cases/gepoverflow.txt
index 01514709..50eaf405 100644
--- a/tests/cases/gepoverflow.txt
+++ b/tests/cases/gepoverflow.txt
@@ -1,2 +1,2 @@
-*1052222,1052792*
+*5246526,5247096*
 *-514,56*
diff --git a/tests/cases/trace.ll b/tests/cases/trace.ll
new file mode 100644
index 00000000..1bada3e9
--- /dev/null
+++ b/tests/cases/trace.ll
@@ -0,0 +1,33 @@
+; ModuleID = 'tests/hello_world.bc'
+
+; A [block] type is used in a function def here. If we investigate types in the very first pass, we will
+; make a bad guess as to the size - we assume undef'd types are [int32]. For this code to compile, we must
+; only investigate in the first side pass, which is correct since there the type defs are handled.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%struct.TraceKindPair = type { i8*, i32 }
+
+@_ZL14traceKindNames = internal constant [4 x %struct.TraceKindPair] [%struct.TraceKindPair { i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i32 -1 }, %struct.TraceKindPair { i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i32 0 }, %struct.TraceKindPair { i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i32 1 }, %struct.TraceKindPair { i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0), i32 3 }], align 4 ; [#uses=3 type=[4 x %struct.TraceKindPair]*]
+
+@.str = private unnamed_addr constant [15 x i8] c"hello, world!\0A\00", align 1 ; [#uses=1 type=[15 x i8]*]
+
+define linkonce_odr hidden i32 @_ZN7mozilla11ArrayLengthIK13TraceKindPairLj4EEEjRAT0__T_([4 x %struct.TraceKindPair]* %arr) nounwind {
+entry:
+  ret i32 1
+}
+
+; [#uses=0]
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4                   ; [#uses=1 type=i32*]
+  store i32 0, i32* %retval
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str, i32 0, i32 0)) ; [#uses=0 type=i32]
+  %0 = getelementptr inbounds [4 x %struct.TraceKindPair]* @_ZL14traceKindNames, i32 0, i32 1; [#uses=1 type=%struct.TraceKindPair*] [debug line = 1473:17]
+  %1 = getelementptr inbounds %struct.TraceKindPair* %0, i32 0, i32 0 ; [#uses=1 type=i8**] [debug line = 1473:17]
+  ret i32 1
+}
+
+; [#uses=1]
+declare i32 @printf(i8*, ...)
diff --git a/tests/runner.py b/tests/runner.py
index de35fbe3..0433b99f 100644
--- a/tests/runner.py
+++ b/tests/runner.py
@@ -105,14 +105,21 @@ class RunnerCore(unittest.TestCase):
         shutil.copyfile(filename + '.o.js', filename + '.o.js.prepost.js')
         process(filename + '.o.js')
     else:
-      if post_build is not None:
-        os.environ['EMCC_JS_PROCESSOR'] = post_build
-      else:
-        try:
-          del os.environ['EMCC_JS_PROCESSOR']
-        except:
-          pass
-      Building.emcc(filename + '.o.ll', Settings.serialize() + self.emcc_args, filename + '.o.js')
+      transform_args = []
+      if post_build:
+        transform_filename = os.path.join(self.get_dir(), 'transform.py')
+        transform = open(transform_filename, 'w')
+        transform.write('''
+import sys
+sys.path += ['%s']
+''' % path_from_root(''))
+        transform.write(post_build)
+        transform.write('''
+process(sys.argv[1])
+''')
+        transform.close()
+        transform_args = ['--js-transform', "python %s" % transform_filename]
+      Building.emcc(filename + '.o.ll', Settings.serialize() + self.emcc_args + transform_args, filename + '.o.js')
 
   # Build JavaScript code from source code
   def build(self, src, dirname, filename, output_processor=None, main_file=None, additional_files=[], libraries=[], includes=[], build_ll_hook=None, extra_emscripten_args=[], post_build=None):
@@ -5142,7 +5149,8 @@ Options that are modified or new in %s include:
         assert output[1].split('2 errors generated.')[1].replace('\n', '') == 'emcc: compiler frontend failed to generate LLVM bitcode, halting'
 
         # emcc src.cpp -c    and   emcc src.cpp -o src.[o|bc] ==> should give a .bc file
-        for args in [['-c'], ['-o', 'src.o'], ['-o', 'src.bc']]:
+        #      regression check: -o js should create "js", with bitcode content
+        for args in [['-c'], ['-o', 'src.o'], ['-o', 'src.bc'], ['-o', 'js']]:
           target = args[1] if len(args) == 2 else 'hello_world.o'
           clear()
           output = Popen([compiler, path_from_root('tests', 'hello_world' + suffix)] + args, stdout=PIPE, stderr=PIPE).communicate()
@@ -5210,7 +5218,7 @@ Options that are modified or new in %s include:
             assert 'Module._main = ' not in generated, 'closure compiler should not have been run'
             # XXX find a way to test this: assert ('& 255' in generated or '&255' in generated) == (opt_level <= 2), 'corrections should be in opt <= 2'
             assert ('(__label__)' in generated) == (opt_level <= 1), 'relooping should be in opt >= 2'
-            assert ('assert(STACKTOP < STACK_MAX)' in generated) == (opt_level == 0), 'assertions should be in opt == 0'
+            assert ('assert(STACKTOP < STACK_MAX' in generated) == (opt_level == 0), 'assertions should be in opt == 0'
             assert 'var $i;' in generated, 'micro opts should always be on'
             if opt_level >= 1: assert 'HEAP8[HEAP32[' in generated, 'eliminator should create compound expressions, and fewer one-time vars'
             assert ('_puts(' in generated) == (opt_level >= 1), 'with opt >= 1, llvm opts are run and they should optimize printf to puts'
@@ -5275,8 +5283,21 @@ Options that are modified or new in %s include:
           assert os.path.exists('combined.bc'), '\n'.join(output)
           self.assertContained('side got: hello from main, over', self.run_llvm_interpreter(['combined.bc']))
 
-      # TODO: Add an argument for EMCC_JS_PROCESSOR to make it simpler to use, other simplifications there (allow non-py, just run it if not .py)
-      #       Add in files test a clear example of using disablePermissions, and link to it from the wiki
+        # --js-transform <transform>
+        clear()
+        trans = os.path.join(self.get_dir(), 't.py')
+        trans_file = open(trans, 'w')
+        trans_file.write('''
+import sys
+f = open(sys.argv[1], 'w')
+f.write('transformed!')
+f.close()
+''')
+        trans_file.close()
+        output = Popen([compiler, path_from_root('tests', 'hello_world' + suffix), '--js-transform', 'python t.py'], stdout=PIPE, stderr=PIPE).communicate()
+        assert open('a.out.js').read() == 'transformed!', 'Transformed output must be as expected'
+
+      # TODO: Add in files test a clear example of using disablePermissions, and link to it from the wiki
       # TODO: test normal project linking, static and dynamic: get_library should not need to be told what to link!
       # TODO: deprecate llvm optimizations, dlmalloc, etc. in emscripten.py.
 
diff --git a/tools/shared.py b/tools/shared.py
index 54725a98..ec33f6ab 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -153,17 +153,17 @@ USE_EMSDK = not os.environ.get('EMMAKEN_NO_SDK')
 if USE_EMSDK:
   # Disable system C and C++ include directories, and add our own (using -idirafter so they are last, like system dirs, which
   # allows projects to override them)
-  EMSDK_OPTS = [ '-nostdinc', '-nostdinc++',
-  '-idirafter' + path_from_root('system', 'include'),
-  '-idirafter' + path_from_root('system', 'include', 'bsd'), # posix stuff
-  '-idirafter' + path_from_root('system', 'include', 'libc'),
-  '-idirafter' + path_from_root('system', 'include', 'libcxx'),
-  '-idirafter' + path_from_root('system', 'include', 'gfx'),
-  '-idirafter' + path_from_root('system', 'include', 'net'),
-  '-idirafter' + path_from_root('system', 'include', 'SDL'),
-] + [
-  '-U__APPLE__'
-]
+  EMSDK_OPTS = ['-nostdinc', '-nostdinc++', '-Xclang', '-nobuiltininc', '-Xclang', '-nostdinc++', '-Xclang', '-nostdsysteminc',
+    '-Xclang', '-isystem' + path_from_root('system', 'include'),
+    '-Xclang', '-isystem' + path_from_root('system', 'include', 'bsd'), # posix stuff
+    '-Xclang', '-isystem' + path_from_root('system', 'include', 'libc'),
+    '-Xclang', '-isystem' + path_from_root('system', 'include', 'libcxx'),
+    '-Xclang', '-isystem' + path_from_root('system', 'include', 'gfx'),
+    '-Xclang', '-isystem' + path_from_root('system', 'include', 'net'),
+    '-Xclang', '-isystem' + path_from_root('system', 'include', 'SDL'),
+  ] + [
+    '-U__APPLE__'
+  ]
   COMPILER_OPTS += EMSDK_OPTS
 else:
   EMSDK_OPTS = []