1 files changed, 202 insertions, 64 deletions
diff --git a/emcc b/emcc
index ffce7363..fa62acb1 100755
--- a/emcc
+++ b/emcc
@@ -1,4 +1,5 @@
 #!/usr/bin/env python2
+# -*- Mode: python -*-
 
 '''
 emcc - compiler helper script
@@ -74,7 +75,7 @@ emcc can be influenced by a few environment variables:
   EMMAKEN_COMPILER - The compiler to be used, if you don't want the default clang.
 '''
 
-import os, sys, shutil, tempfile, subprocess, shlex, time
+import os, sys, shutil, tempfile, subprocess, shlex, time, re
 from subprocess import PIPE, STDOUT
 from tools import shared
 from tools.shared import Compression, execute, suffix, unsuffixed, unsuffixed_basename
@@ -90,7 +91,10 @@ LLVM_OPT_LEVEL = {
   3: 3,
 }
 
-DEBUG = int(os.environ.get('EMCC_DEBUG') or 0)
+DEBUG = os.environ.get('EMCC_DEBUG')
+if DEBUG == "0":
+  DEBUG = None
+
 TEMP_DIR = os.environ.get('EMCC_TEMP_DIR')
 LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll files into .bc, just compile them with emscripten directly
                                                            # Not recommended, this is mainly for the test runner, or if you have some other
@@ -118,12 +122,43 @@ if len(sys.argv) == 1:
   print 'emcc: no input files'
   exit(1)
 
+# read response files very early on
+response_file = True
+while response_file:
+  response_file = None
+  for index in range(1, len(sys.argv)):
+    if sys.argv[index][0] == '@':
+      # found one, loop again next time
+      print >>sys.stderr, 'emcc: using response file: %s' % response_file
+      response_file = sys.argv[index][1:]
+      if not os.path.exists(response_file):
+        print >>sys.stderr, 'emcc: error: Response file not found: %s' % response_file
+        exit(1)
+
+      response_fd = open(response_file, 'r')
+      extra_args = shlex.split(response_fd.read())
+      response_fd.close()
+
+      # slice in extra_args in place of the response file arg
+      sys.argv[index:index+1] = extra_args
+      #if DEBUG: print >>sys.stderr, "Expanded response file: " + " | ".join(sys.argv)
+      break
+
 if sys.argv[1] == '--version':
-  print '''emcc (Emscripten GCC-like replacement) 2.0
-Copyright (C) 2012 the Emscripten authors.
+  revision = '(unknown revision)'
+  here = os.getcwd()
+  os.chdir(shared.path_from_root())
+  try:
+    revision = execute(['git', 'show'], stdout=PIPE, stderr=PIPE)[0].split('\n')[0]
+  except:
+    pass
+  finally:
+    os.chdir(here)
+  print '''emcc (Emscripten GCC-like replacement) %s (%s)
+Copyright (C) 2013 the Emscripten authors (see AUTHORS.txt)
 This is free and open source software under the MIT license.
 There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-  '''
+  ''' % (shared.EMSCRIPTEN_VERSION, revision)
   exit(0)
 elif sys.argv[1] == '--help':
   this = os.path.basename('em++' if os.environ.get('EMMAKEN_CXX') else 'emcc')
@@ -146,16 +181,23 @@ Options that are modified or new in %s include:
                            tools/shared.py and also src/settings.js.)
                            Note: Optimizations are only done when
                            compiling to JavaScript, not to intermediate
-                           bitcode.
+                           bitcode, *unless* you build with
+                           EMCC_OPTIMIZE_NORMALLY=1 (not recommended
+                           unless you know what you are doing!)
   -O2                      As -O1, plus the relooper (loop recreation),
-                           plus closure compiler advanced opts, plus
-                           LLVM -O2 optimizations
-                           Warning: Compiling with this takes a long time!
+                           plus LLVM -O2 optimizations
   -O3                      As -O2, plus dangerous optimizations that may
-                           break the generated code! This is not
-                           recommended at all, see the wiki for more
-                           details (you can try -O2 and then add
-                           dangerous optimizations one by one).
+                           break the generated code! This adds
+
+                              -s DOUBLE_MODE=0
+                              -s PRECISE_I64_MATH=0
+                              --closure 1
+                              --llvm-lto 1
+
+                           This is not recommended at all. A better idea
+                           is to try each of these separately on top of
+                           -O2 to see what works. See the wiki for more
+                           information.
 
   -s OPTION=VALUE          JavaScript code generation option passed
                            into the emscripten compiler. For the
@@ -177,6 +219,12 @@ Options that are modified or new in %s include:
                            the last compilation phase from bitcode to
                            JavaScript, or else we will remove it by
                            default in -O1 and above.
+                           In -O0, line numbers wil be shown in the
+                           generated code. In -O1 and above, the optimizer
+                           removes those comments. This flag does however
+                           have the effect of disabling anything that
+                           causes name mangling or minification (closure
+                           or the registerize pass).
 
   --typed-arrays <mode>    0: No typed arrays
                            1: Parallel typed arrays
@@ -187,14 +235,23 @@ Options that are modified or new in %s include:
                            2: -O2 LLVM optimizations
                            3: -O3 LLVM optimizations (default in -O2+)
 
-  --llvm-lto <level>       0: No LLVM LTO (default in -O0)
-                           1: LLVM LTO (default in -O1+)
+  --llvm-lto <level>       0: No LLVM LTO (default in -O2 and below)
+                           1: LLVM LTO (default in -O3)
                            Note: If LLVM optimizations are not run
                            (see --llvm-opts), setting this to 1 has no
                            effect.
 
-  --closure <on>           0: No closure compiler (default in -O0, -O1)
-                           1: Run closure compiler (default in -O2, -O3)
+  --closure <on>           0: No closure compiler (default in -O2 and below)
+                           1: Run closure compiler. This greatly reduces
+                           code size and may in some cases increase
+                           runtime speed (although the opposite can also
+                           occur). Note that it takes time to run, and
+                           may require some changes to the code. This
+                           is run by default in -O3.
+
+                           Note: If closure compiler hits an out-of-memory,
+                           try adjusting JAVA_HEAP_SIZE in the environment
+                           (for example, to 4096m for 4GB).
 
   --js-transform <cmd>     <cmd> will be called on the generated code
                            before it is optimized. This lets you modify
@@ -345,6 +402,41 @@ Options that are modified or new in %s include:
                            for a later incremental build (where you also
                            enable it) to be sped up.
 
+                           Caching works separately on 4 parts of compilation:
+                           'pre' which is types and global variables; that
+                           information is then fed into 'funcs' which are
+                           the functions (which we parallelize), and then
+                           'post' which adds final information based on
+                           the functions (e.g., do we need long64 support
+                           code). Finally, 'jsfuncs' are JavaScript-level
+                           optimizations. Each of the 4 parts can be cached
+                           separately, but note that they can affect each
+                           other: If you recompile a single C++ file that
+                           changes a global variable - e.g., adds, removes
+                           or modifies a global variable, say by adding
+                           a printf or by adding a compile-time timestamp,
+                           then 'pre' cannot be loaded from the cache. And
+                           since 'pre's output is sent to 'funcs' and 'post',
+                           they will get invalidated as well, and only
+                           'jsfuncs' will be cached. So avoid modifying
+                           globals to let caching work fully.
+
+                           To work around the problem mentioned in the
+                           previous paragraph, you can use
+
+                            emscripten_jcache_printf
+
+                           when adding debug printfs to your code. That
+                           function is specially preprocessed so that it
+                           does not create a constant string global for
+                           its first argument. See emscripten.h for more
+                           details. Note in particular that you need to
+                           already have a call to that function in your
+                           code *before* you add one and do an incremental
+                           build, so that adding an external reference
+                           (also a global property) does not invalidate
+                           everything.
+
   --clear-cache            Manually clears the cache of compiled
                            emscripten system libraries (libc++,
                            libc++abi, libc). This is normally
@@ -600,7 +692,8 @@ try:
   ignore_dynamic_linking = False
   shell_path = shared.path_from_root('src', 'shell.html')
   js_libraries = []
-  keep_debug = False
+  keep_llvm_debug = False
+  keep_js_debug = False
   bind = False
   jcache = False
   if use_cxx:
@@ -613,18 +706,21 @@ try:
 
   absolute_warning_shown = False
 
+  settings_changes = []
+
   for i in range(len(newargs)):
     newargs[i] = newargs[i].strip() # On Windows Vista (and possibly others), excessive spaces in the command line leak into the items in this array, so trim e.g. 'foo.cpp ' -> 'foo.cpp'
     if newargs[i].startswith('-O'):
-      requested_level = newargs[i][2]
+      # Let -O default to -O2, which is what gcc does.
+      requested_level = newargs[i][2:] or '2'
       if requested_level == 's':
-        print >> sys.stderr, 'emcc: warning: -Os is ignored (use -O0, -O1, -O2)'
-      else:
-        try:
-          opt_level = int(requested_level)
-          assert 0 <= opt_level <= 3
-        except:
-          raise Exception('Invalid optimization level: ' + newargs[i])
+        requested_level = 2
+        settings_changes.append('INLINING_LIMIT=50')
+      try:
+        opt_level = int(requested_level)
+        assert 0 <= opt_level <= 3
+      except:
+        raise Exception('Invalid optimization level: ' + newargs[i])
       newargs[i] = ''
     elif newargs[i].startswith('--llvm-opts'):
       check_bad_eq(newargs[i])
@@ -667,7 +763,8 @@ try:
       newargs[i] = ''
       newargs[i+1] = ''
     elif newargs[i] == '-g':
-      keep_debug = True
+      keep_llvm_debug = True
+      keep_js_debug = True
     elif newargs[i] == '--bind':
       bind = True
       newargs[i] = ''
@@ -737,16 +834,16 @@ try:
     newargs = newargs + [default_cxx_std]
 
   if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level]
-  if llvm_lto is None: llvm_lto = llvm_opts > 0
-  if closure is None: closure = 1 if opt_level >= 2 else 0
-  if opt_level <= 0: keep_debug = True # always keep debug in -O0
+  if llvm_lto is None: llvm_lto = opt_level >= 3
+  if opt_level <= 0: keep_llvm_debug = keep_js_debug = True # always keep debug in -O0
+  if opt_level > 0: keep_llvm_debug = False # JS optimizer wipes out llvm debug info from being visible
+  if closure is None and opt_level == 3: closure = True
 
   if DEBUG: start_time = time.time() # done after parsing arguments, which might affect debug state
 
   if closure:
     assert os.path.exists(shared.CLOSURE_COMPILER), 'emcc: fatal: Closure compiler (%s) does not exist' % shared.CLOSURE_COMPILER
 
-  settings_changes = []
   for i in range(len(newargs)):
     if newargs[i] == '-s':
       if is_minus_s_for_emcc(newargs, i):
@@ -879,7 +976,11 @@ try:
       shared.Settings.CORRECT_OVERFLOWS = 1
 
   if shared.Settings.CORRECT_SIGNS >= 2 or shared.Settings.CORRECT_OVERFLOWS >= 2 or shared.Settings.CORRECT_ROUNDINGS >= 2:
-    keep_debug = True # must keep debug info to do line-by-line operations 
+    keep_llvm_debug = True # must keep debug info to do line-by-line operations 
+
+  if (keep_llvm_debug or keep_js_debug) and closure:
+    print >> sys.stderr, 'emcc: warning: disabling closure because debug info was requested'
+    closure = False
 
   if minify_whitespace is None:
     minify_whitespace = closure # if closure is run, minify whitespace
@@ -894,6 +995,7 @@ try:
   for input_file in input_files:
     if input_file.endswith(SOURCE_SUFFIXES):
       if DEBUG: print >> sys.stderr, 'emcc: compiling source file: ', input_file
+      input_file = shared.Building.preprocess(input_file, in_temp(uniquename(input_file)))
       output_file = in_temp(unsuffixed(uniquename(input_file)) + '.o')
       temp_files.append(output_file)
       args = newargs + ['-emit-llvm', '-c', input_file, '-o', output_file]
@@ -927,7 +1029,15 @@ try:
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in JS_CONTAINING_SUFFIXES:
     if llvm_opts > 0:
-      print >> sys.stderr, 'emcc: warning: -Ox flags ignored, since not generating JavaScript'
+      if not os.environ.get('EMCC_OPTIMIZE_NORMALLY'):
+        print >> sys.stderr, 'emcc: warning: -Ox flags ignored, since not generating JavaScript'
+      else:
+        for input_file in input_files:
+          if input_file.endswith(SOURCE_SUFFIXES):
+            if DEBUG: print >> sys.stderr, 'emcc: optimizing %s with -O%d since EMCC_OPTIMIZE_NORMALLY defined' % (input_file, llvm_opts)
+            shared.Building.llvm_opt(in_temp(unsuffixed(uniquename(input_file)) + '.o'), llvm_opts)
+          else:
+            if DEBUG: print >> sys.stderr, 'emcc: not optimizing %s despite EMCC_OPTIMIZE_NORMALLY since not source code' % (input_file)
     if not specified_target:
       for input_file in input_files:
         shutil.move(in_temp(unsuffixed(uniquename(input_file)) + '.o'), unsuffixed_basename(input_file) + '.' + final_suffix)
@@ -959,7 +1069,22 @@ try:
     def create_libc():
       if DEBUG: print >> sys.stderr, 'emcc: building libc for cache'
       o_s = []
-      for src in ['dlmalloc.c', os.path.join('libcxx', 'new.cpp')]:
+      libc_files = [
+        'dlmalloc.c',
+        os.path.join('libcxx', 'new.cpp'),
+        os.path.join('libc', 'stdlib', 'getopt_long.c'),
+        os.path.join('libc', 'gen', 'err.c'),
+        os.path.join('libc', 'gen', 'errx.c'),
+        os.path.join('libc', 'gen', 'warn.c'),
+        os.path.join('libc', 'gen', 'warnx.c'),
+        os.path.join('libc', 'gen', 'verr.c'),
+        os.path.join('libc', 'gen', 'verrx.c'),
+        os.path.join('libc', 'gen', 'vwarn.c'),
+        os.path.join('libc', 'gen', 'vwarnx.c'),
+        os.path.join('libc', 'stdlib', 'strtod.c'),
+      ];
+
+      for src in libc_files:
         o = in_temp(os.path.basename(src) + '.o')
         execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', src), '-o', o], stdout=stdout, stderr=stderr)
         o_s.append(o)
@@ -984,7 +1109,7 @@ try:
     def create_libcxx():
       if DEBUG: print >> sys.stderr, 'emcc: building libcxx for cache'
       os = []
-      for src in ['algorithm.cpp', 'condition_variable.cpp', 'future.cpp', 'iostream.cpp', 'memory.cpp', 'random.cpp', 'stdexcept.cpp', 'system_error.cpp', 'utility.cpp', 'bind.cpp', 'debug.cpp', 'hash.cpp', 'mutex.cpp', 'string.cpp', 'thread.cpp', 'valarray.cpp', 'chrono.cpp', 'exception.cpp', 'ios.cpp', 'locale.cpp', 'regex.cpp', 'strstream.cpp', 'typeinfo.cpp']:
+      for src in ['algorithm.cpp', 'condition_variable.cpp', 'future.cpp', 'iostream.cpp', 'memory.cpp', 'random.cpp', 'stdexcept.cpp', 'system_error.cpp', 'utility.cpp', 'bind.cpp', 'debug.cpp', 'hash.cpp', 'mutex.cpp', 'string.cpp', 'thread.cpp', 'valarray.cpp', 'chrono.cpp', 'exception.cpp', 'ios.cpp', 'locale.cpp', 'regex.cpp', 'strstream.cpp']:
         o = in_temp(src + '.o')
         execute([shared.PYTHON, shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', src), '-o', o], stdout=stdout, stderr=stderr)
         os.append(o)
@@ -1003,7 +1128,7 @@ try:
     def create_libcxxabi():
       if DEBUG: print >> sys.stderr, 'emcc: building libcxxabi for cache'
       os = []
-      for src in ['private_typeinfo.cpp']:
+      for src in ['private_typeinfo.cpp', 'typeinfo.cpp']:
         o = in_temp(src + '.o')
         execute([shared.PYTHON, shared.EMXX, shared.path_from_root('system', 'lib', 'libcxxabi', 'src', src), '-o', o], stdout=stdout, stderr=stderr)
         os.append(o)
@@ -1017,29 +1142,32 @@ try:
     libcxxabi_symbols = filter(lambda symbol: symbol not in libc_symbols, libcxxabi_symbols)
     libcxxabi_symbols = set(libcxxabi_symbols)
 
-    force = False # If we have libcxx, we must force inclusion of libc, since libcxx uses new internally. Note: this is kind of hacky
-
+    # If we have libcxx, we must force inclusion of libc, since libcxx uses new internally. Note: this is kind of hacky
+    # Settings this in the environment will avoid checking dependencies and make building big projects a little faster
+    force = os.environ.get('EMCC_FORCE_STDLIBS')
+    has = need = None
     for name, create, fix, library_symbols in [('libcxx',    create_libcxx,    fix_libcxx,    libcxx_symbols),
                                                ('libcxxabi', create_libcxxabi, fix_libcxxabi, libcxxabi_symbols),
                                                ('libc',      create_libc,      fix_libc,      libc_symbols)]:
-      need = set()
-      has = set()
-      for temp_file in temp_files:
-        symbols = shared.Building.llvm_nm(temp_file)
-        for library_symbol in library_symbols:
-          if library_symbol in symbols.undefs:
-            need.add(library_symbol)
-          if library_symbol in symbols.defs:
-            has.add(library_symbol)
-      for haz in has: # remove symbols that are supplied by another of the inputs
-        if haz in need:
-          need.remove(haz)
-      if DEBUG: print >> sys.stderr, 'emcc: considering including %s: we need %s and have %s' % (name, str(need), str(has))
+      if not force:
+        need = set()
+        has = set()
+        for temp_file in temp_files:
+          symbols = shared.Building.llvm_nm(temp_file)
+          for library_symbol in library_symbols:
+            if library_symbol in symbols.undefs:
+              need.add(library_symbol)
+            if library_symbol in symbols.defs:
+              has.add(library_symbol)
+        for haz in has: # remove symbols that are supplied by another of the inputs
+          if haz in need:
+            need.remove(haz)
+        if DEBUG: print >> sys.stderr, 'emcc: considering including %s: we need %s and have %s' % (name, str(need), str(has))
       if force or len(need) > 0:
         # We need to build and link the library in
         if DEBUG: print >> sys.stderr, 'emcc: including %s' % name
         libfile = shared.Cache.get(name, create)
-        if len(has) > 0:
+        if has and len(has) > 0:
           # remove the symbols we do not need
           fixed = in_temp(uniquename(libfile)) + '.bc'
           shutil.copyfile(libfile, fixed)
@@ -1049,7 +1177,7 @@ try:
           libfile = fixed
         extra_files_to_link.append(libfile)
         force = True
-        if fix:
+        if fix and need:
           fix(need)
 
   # First, combine the bitcode files if there are several. We must also link if we have a singleton .a
@@ -1057,7 +1185,10 @@ try:
      (not LEAVE_INPUTS_RAW and not (suffix(temp_files[0]) in BITCODE_SUFFIXES or suffix(temp_files[0]) in DYNAMICLIB_SUFFIXES) and shared.Building.is_ar(temp_files[0])):
     linker_inputs = temp_files + extra_files_to_link
     if DEBUG: print >> sys.stderr, 'emcc: linking: ', linker_inputs
+    t0 = time.time()
     shared.Building.link(linker_inputs, in_temp(target_basename + '.bc'))
+    t1 = time.time()
+    if DEBUG: print >> sys.stderr, 'emcc:    linking took %.2f seconds' % (t1 - t0)
     final = in_temp(target_basename + '.bc')
   else:
     if not LEAVE_INPUTS_RAW:
@@ -1084,11 +1215,14 @@ try:
 
   # Optimize, if asked to
   if not LEAVE_INPUTS_RAW:
-    link_opts = [] if keep_debug else ['-strip-debug']
+    link_opts = [] if keep_llvm_debug else ['-strip-debug'] # remove LLVM debug info in -O1+, since the optimizer removes it anyhow
     if llvm_opts > 0:
-      shared.Building.llvm_opt(in_temp(target_basename + '.bc'), llvm_opts)
-      if DEBUG: save_intermediate('opt', 'bc')
-      # Do LTO in a separate pass to work around LLVM bug XXX (see failure e.g. in cubescript)
+      if not os.environ.get('EMCC_OPTIMIZE_NORMALLY'):
+        shared.Building.llvm_opt(in_temp(target_basename + '.bc'), llvm_opts)
+        if DEBUG: save_intermediate('opt', 'bc')
+        # Do LTO in a separate pass to work around LLVM bug XXX (see failure e.g. in cubescript)
+      else:
+        if DEBUG: print >> sys.stderr, 'emcc: not running opt because EMCC_OPTIMIZE_NORMALLY was specified, opt should have been run before'
     if shared.Building.can_build_standalone():
       # If we can LTO, do it before dce, since it opens up dce opportunities
       if llvm_lto and shared.Building.can_use_unsafe_opts():
@@ -1167,15 +1301,12 @@ try:
     execute(shlex.split(js_transform, posix=posix) + [os.path.abspath(final)])
     if DEBUG: save_intermediate('transformed')
 
-  if shared.Settings.ASM_JS: # XXX temporary wrapping for testing purposes
-    print >> sys.stderr, 'emcc: ASM_JS mode is highly experimental, and will not work on most codebases yet. It is NOT recommended that you try this yet.'
-
   # It is useful to run several js optimizer passes together, to save on unneeded unparsing/reparsing
   js_optimizer_queue = []
   def flush_js_optimizer_queue():
     global final, js_optimizer_queue
     if len(js_optimizer_queue) > 0 and not(len(js_optimizer_queue) == 1 and js_optimizer_queue[0] == 'last'):
-      if DEBUG < 2:
+      if DEBUG != '2':
         if shared.Settings.ASM_JS:
           js_optimizer_queue = ['asm'] + js_optimizer_queue
         if DEBUG: print >> sys.stderr, 'emcc: applying js optimization passes:', js_optimizer_queue
@@ -1194,7 +1325,7 @@ try:
   if opt_level >= 1:
     if DEBUG: print >> sys.stderr, 'emcc: running pre-closure post-opts'
 
-    if DEBUG >= 2:
+    if DEBUG == '2':
       # Clean up the syntax a bit
       final = shared.Building.js_optimizer(final, [], jcache)
       if DEBUG: save_intermediate('pretty')
@@ -1216,8 +1347,9 @@ try:
     if DEBUG: print >> sys.stderr, 'emcc: running closure'
     final = shared.Building.closure_compiler(final)
     if DEBUG: save_intermediate('closure')
-  elif shared.Settings.ASM_JS and shared.Settings.RELOOP:
-    js_optimizer_queue += ['registerize'] # we can't use closure in asm, but this does much of the same
+  elif shared.Settings.RELOOP and not closure and not keep_js_debug:
+    # do this if closure is not enabled (it gives similar speedups), and we do not need to keep debug info around
+    js_optimizer_queue += ['registerize']
 
   if opt_level >= 1:
     if DEBUG: print >> sys.stderr, 'emcc: running post-closure post-opts'
@@ -1230,6 +1362,12 @@ try:
 
   flush_js_optimizer_queue()
 
+  if not minify_whitespace:
+    # Remove some trivial whitespace
+    src = open(final).read()
+    src = re.sub(r'\n+[ \n]*\n+', '\n', src)
+    open(final, 'w').write(src)
+
   # If we were asked to also generate HTML, do that
   if final_suffix == 'html':
     if DEBUG: print >> sys.stderr, 'emcc: generating HTML'