1 files changed, 92 insertions, 52 deletions
diff --git a/emcc b/emcc
index 3bc35aa4..705538fc 100755
--- a/emcc
+++ b/emcc
@@ -49,7 +49,7 @@ emcc can be influenced by a few environment variables:
 
 import os, sys, shutil, tempfile, subprocess, shlex, time, re, logging
 from subprocess import PIPE, STDOUT
-from tools import shared
+from tools import shared, jsrun
 from tools.shared import Compression, execute, suffix, unsuffixed, unsuffixed_basename
 from tools.response_file import read_response_file
 
@@ -127,11 +127,6 @@ Options that are modified or new in %s include:
                            (For details on the affects of different
                            opt levels, see apply_opt_level() in
                            tools/shared.py and also src/settings.js.)
-                           Note: Optimizations are only done when
-                           compiling to JavaScript, not to intermediate
-                           bitcode, *unless* you build with
-                           EMCC_OPTIMIZE_NORMALLY=1 (not recommended
-                           unless you know what you are doing!)
   -O2                      As -O1, plus the relooper (loop recreation),
                            LLVM -O2 optimizations, and
 
@@ -144,7 +139,7 @@ Options that are modified or new in %s include:
                               -s DOUBLE_MODE=0
                               -s PRECISE_I64_MATH=0
                               --closure 1
-                              --llvm-lto 1
+                              --llvm-lto 3
 
                            This is not recommended at all. A better idea
                            is to try each of these separately on top of
@@ -203,10 +198,13 @@ Options that are modified or new in %s include:
                             -g2  Preserve function names
                             -g3  Preserve variable names
                             -g4  Preserve LLVM debug info (if -g was
-                                 used when compiling the C/C++ sources)
-                                 and show line number debug comments.
-                                 This is the highest level of debuggability.
-                                 (default in -O0)
+                                 used when compiling the C/C++ sources),
+                                 show line number debug comments, and
+                                 generate source maps. This is the highest
+                                 level of debuggability. Note that this
+                                 may make -O1 and above significantly
+                                 slower because JS optimization will be
+                                 limited to 1 core.  (default in -O0)
 
   --typed-arrays <mode>    0: No typed arrays
                            1: Parallel typed arrays
@@ -217,10 +215,15 @@ Options that are modified or new in %s include:
                            2: -O2 LLVM optimizations
                            3: -O3 LLVM optimizations (default in -O2+)
 
-  --llvm-lto <level>       0: No LLVM LTO (default in -O2 and below)
-                           1: LLVM LTO (default in -O3)
+  --llvm-lto <level>       0: No LLVM LTO (default)
+                           1: LLVM LTO is performed
+                           2: We combine all the bitcode and run LLVM opt -O3
+                              on that (which optimizes across modules, but is
+                              not the same as normal LTO), but do not do normal
+                              LTO
+                           3: We do both 2 and then 1
                            Note: If LLVM optimizations are not run
-                           (see --llvm-opts), setting this to 1 has no
+                           (see --llvm-opts), setting this has no
                            effect.
 
   --closure <on>           0: No closure compiler (default in -O2 and below)
@@ -731,6 +734,14 @@ try:
 
   settings_changes = []
 
+  def validate_arg_level(level_string, max_level, err_msg):
+    try:
+      level = int(level_string)
+      assert 0 <= level <= max_level
+    except:
+      raise Exception(err_msg)
+    return level
+
   for i in range(len(newargs)):
     newargs[i] = newargs[i].strip() # On Windows Vista (and possibly others), excessive spaces in the command line leak into the items in this array, so trim e.g. 'foo.cpp ' -> 'foo.cpp'
     if newargs[i].startswith('-O'):
@@ -739,11 +750,7 @@ try:
       if requested_level == 's':
         requested_level = 2
         settings_changes.append('INLINING_LIMIT=50')
-      try:
-        opt_level = int(requested_level)
-        assert 0 <= opt_level <= 3
-      except:
-        raise Exception('Invalid optimization level: ' + newargs[i])
+      opt_level = validate_arg_level(requested_level, 3, 'Invalid optimization level: ' + newargs[i])
       newargs[i] = ''
     elif newargs[i].startswith('--llvm-opts'):
       check_bad_eq(newargs[i])
@@ -787,12 +794,8 @@ try:
       newargs[i+1] = ''
     elif newargs[i].startswith('-g'):
       requested_level = newargs[i][2:] or '3'
-      try:
-        debug_level = int(requested_level)
-        assert 0 <= debug_level <= 4
-      except:
-        raise Exception('Invalid debug level: ' + newargs[i])
-      newargs[i] = '-g' # discard level for clang args
+      debug_level = validate_arg_level(requested_level, 4, 'Invalid debug level: ' + newargs[i])
+      newargs[i] = '-g' # we'll need this to get LLVM debug info
     elif newargs[i] == '--bind':
       bind = True
       newargs[i] = ''
@@ -881,10 +884,19 @@ try:
     newargs = newargs + [default_cxx_std]
 
   if llvm_opts is None: llvm_opts = LLVM_OPT_LEVEL[opt_level]
-  if llvm_lto is None: llvm_lto = opt_level >= 3
+  if llvm_lto is None and opt_level >= 3: llvm_lto = 3
   if opt_level == 0: debug_level = 4
   if closure is None and opt_level == 3: closure = True
 
+  if llvm_lto is None and bind:
+    logging.debug('running lto for embind') # XXX this is a workaround for a pointer issue
+    llvm_lto = 1
+
+  # TODO: support source maps with js_transform
+  if js_transform and debug_level >= 4:
+    logging.warning('disabling source maps because a js transform is being done')
+    debug_level = 3
+
   if DEBUG: start_time = time.time() # done after parsing arguments, which might affect debug state
 
   if closure:
@@ -1050,6 +1062,10 @@ try:
   else:
     raise Exception('unknown llvm target: ' + str(shared.LLVM_TARGET))
 
+  if shared.Settings.USE_TYPED_ARRAYS != 2 and llvm_opts > 0:
+    logging.warning('disabling LLVM optimizations, need typed arrays mode 2 for them')
+    llvm_opts = 0
+
   ## Compile source code to bitcode
 
   logging.debug('compiling to bitcode')
@@ -1089,20 +1105,20 @@ try:
           shared.Building.llvm_as(input_file, temp_file)
           temp_files.append(temp_file)
 
-  if not LEAVE_INPUTS_RAW: assert len(temp_files) == len(input_files)
+  if not LEAVE_INPUTS_RAW:
+    assert len(temp_files) == len(input_files)
+
+    # Optimize source files
+    if llvm_opts > 0:
+      for i in range(len(input_files)):
+        input_file = input_files[i]
+        if input_files[i].endswith(SOURCE_SUFFIXES):
+          temp_file = temp_files[i]
+          logging.debug('optimizing %s with -O%d' % (input_file, llvm_opts))
+          shared.Building.llvm_opt(temp_file, llvm_opts)
 
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in JS_CONTAINING_SUFFIXES:
-    if llvm_opts > 0:
-      if not os.environ.get('EMCC_OPTIMIZE_NORMALLY'):
-        logging.warning('-Ox flags ignored, since not generating JavaScript')
-      else:
-        for input_file in input_files:
-          if input_file.endswith(SOURCE_SUFFIXES):
-            logging.debug('optimizing %s with -O%d since EMCC_OPTIMIZE_NORMALLY defined' % (input_file, llvm_opts))
-            shared.Building.llvm_opt(in_temp(unsuffixed(uniquename(input_file)) + '.o'), llvm_opts)
-          else:
-            logging.debug('not optimizing %s despite EMCC_OPTIMIZE_NORMALLY since not source code' % (input_file))
     if not specified_target:
       for input_file in input_files:
         shutil.move(in_temp(unsuffixed(uniquename(input_file)) + '.o'), unsuffixed_basename(input_file) + '.' + final_suffix)
@@ -1134,6 +1150,8 @@ try:
         symbols = filter(lambda symbol: symbol not in exclude, symbols)
       return set(symbols)
 
+    lib_opts = ['-O2']
+
     # XXX We also need to add libc symbols that use malloc, for example strdup. It's very rare to use just them and not
     #     a normal malloc symbol (like free, after calling strdup), so we haven't hit this yet, but it is possible.
     libc_symbols = read_symbols(shared.path_from_root('system', 'lib', 'libc.symbols'))
@@ -1142,7 +1160,7 @@ try:
     libcxx_symbols = read_symbols(shared.path_from_root('system', 'lib', 'libcxx', 'symbols'), exclude=libc_symbols)
     libcxxabi_symbols = read_symbols(shared.path_from_root('system', 'lib', 'libcxxabi', 'symbols'), exclude=libc_symbols)
 
-    # XXX we should disable EMCC_DEBUG (and EMCC_OPTIMIZE_NORMALLY?) when building libs, just like in the relooper
+    # XXX we should disable EMCC_DEBUG when building libs, just like in the relooper
 
     def build_libc(lib_filename, files):
       o_s = []
@@ -1151,7 +1169,7 @@ try:
       musl_internal_includes = shared.path_from_root('system', 'lib', 'libc', 'musl', 'src', 'internal')
       for src in files:
         o = in_temp(os.path.basename(src) + '.o')
-        execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', src), '-o', o, '-I', musl_internal_includes], stdout=stdout, stderr=stderr)
+        execute([shared.PYTHON, shared.EMCC, shared.path_from_root('system', 'lib', src), '-o', o, '-I', musl_internal_includes] + lib_opts, stdout=stdout, stderr=stderr)
         o_s.append(o)
       if prev_cxx: os.environ['EMMAKEN_CXX'] = prev_cxx
       shared.Building.link(o_s, in_temp(lib_filename))
@@ -1162,7 +1180,7 @@ try:
       for src in files:
         o = in_temp(src + '.o')
         srcfile = shared.path_from_root(src_dirname, src)
-        execute([shared.PYTHON, shared.EMXX, srcfile, '-o', o, '-std=c++11'], stdout=stdout, stderr=stderr)
+        execute([shared.PYTHON, shared.EMXX, srcfile, '-o', o, '-std=c++11'] + lib_opts, stdout=stdout, stderr=stderr)
         o_s.append(o)
       shared.Building.link(o_s, in_temp(lib_filename))
       return in_temp(lib_filename)
@@ -1409,16 +1427,15 @@ try:
   # Optimize, if asked to
   if not LEAVE_INPUTS_RAW:
     link_opts = [] if debug_level >= 4 else ['-strip-debug'] # remove LLVM debug if we are not asked for it
-    if llvm_opts > 0:
-      if not os.environ.get('EMCC_OPTIMIZE_NORMALLY'):
-        shared.Building.llvm_opt(in_temp(target_basename + '.bc'), llvm_opts)
-        if DEBUG: save_intermediate('opt', 'bc')
-        # Do LTO in a separate pass to work around LLVM bug XXX (see failure e.g. in cubescript)
-      else:
-        logging.debug('not running opt because EMCC_OPTIMIZE_NORMALLY was specified, opt should have been run before')
+
+    if llvm_lto >= 2:
+      logging.debug('running LLVM opt -O3 as pre-LTO')
+      shared.Building.llvm_opt(in_temp(target_basename + '.bc'), ['-O3'])
+      if DEBUG: save_intermediate('opt', 'bc')
+
     if shared.Building.can_build_standalone():
       # If we can LTO, do it before dce, since it opens up dce opportunities
-      if llvm_lto and shared.Building.can_use_unsafe_opts():
+      if llvm_lto and llvm_lto != 2 and shared.Building.can_use_unsafe_opts():
         if not shared.Building.can_inline(): link_opts.append('-disable-inlining')
         # do not internalize in std-link-opts - it ignores internalize-public-api-list - and add a manual internalize
         link_opts += ['-disable-internalize'] + shared.Building.get_safe_internalize() + ['-std-link-opts']
@@ -1496,9 +1513,11 @@ try:
     final += '.tr.js'
     posix = True if not shared.WINDOWS else False
     logging.debug('applying transform: %s' % js_transform)
-    execute(shlex.split(js_transform, posix=posix) + [os.path.abspath(final)])
+    subprocess.check_call(shlex.split(js_transform, posix=posix) + [os.path.abspath(final)])
     if DEBUG: save_intermediate('transformed')
 
+  js_transform_tempfiles = [final]
+
   # It is useful to run several js optimizer passes together, to save on unneeded unparsing/reparsing
   js_optimizer_queue = []
   def flush_js_optimizer_queue():
@@ -1508,7 +1527,8 @@ try:
         if shared.Settings.ASM_JS:
           js_optimizer_queue = ['asm'] + js_optimizer_queue
         logging.debug('applying js optimization passes: %s', js_optimizer_queue)
-        final = shared.Building.js_optimizer(final, js_optimizer_queue, jcache)
+        final = shared.Building.js_optimizer(final, js_optimizer_queue, jcache, debug_level >= 4)
+        js_transform_tempfiles.append(final)
         if DEBUG: save_intermediate('js_opts')
       else:
         for name in js_optimizer_queue:
@@ -1516,7 +1536,8 @@ try:
           if shared.Settings.ASM_JS:
             passes = ['asm'] + passes
           logging.debug('applying js optimization pass: %s', passes)
-          final = shared.Building.js_optimizer(final, passes, jcache)
+          final = shared.Building.js_optimizer(final, passes, jcache, debug_level >= 4)
+          js_transform_tempfiles.append(final)
           save_intermediate(name)
       js_optimizer_queue = []
 
@@ -1525,7 +1546,8 @@ try:
 
     if DEBUG == '2':
       # Clean up the syntax a bit
-      final = shared.Building.js_optimizer(final, [], jcache)
+      final = shared.Building.js_optimizer(final, [], jcache, debug_level >= 4)
+      js_transform_tempfiles.append(final)
       if DEBUG: save_intermediate('pretty')
 
     def get_eliminate():
@@ -1543,6 +1565,8 @@ try:
     flush_js_optimizer_queue()
 
     logging.debug('running closure')
+    # no need to add this to js_transform_tempfiles, because closure and
+    # debug_level > 0 are never simultaneously true
     final = shared.Building.closure_compiler(final)
     if DEBUG: save_intermediate('closure')
 
@@ -1590,6 +1614,7 @@ try:
       src = re.sub('/\* memory initializer \*/ allocate\(([\d,\.concat\(\)\[\]\\n ]+)"i8", ALLOC_NONE, Runtime\.GLOBAL_BASE\)', repl, src, count=1)
       open(final + '.mem.js', 'w').write(src)
       final += '.mem.js'
+      js_transform_tempfiles[-1] = final # simple text substitution preserves comment line number mappings
       if DEBUG:
         if os.path.exists(memfile):
           save_intermediate('meminit')
@@ -1597,12 +1622,25 @@ try:
         else:
           logging.debug('did not see memory initialization')
 
+  def generate_source_map(map_file_base_name, offset=0):
+    jsrun.run_js(shared.path_from_root('tools', 'source-maps', 'sourcemapper.js'),
+      shared.NODE_JS, js_transform_tempfiles +
+        ['--sourceRoot', os.getcwd(),
+         '--mapFileBaseName', map_file_base_name,
+         '--offset', str(offset)])
+
   # If we were asked to also generate HTML, do that
   if final_suffix == 'html':
     logging.debug('generating HTML')
     shell = open(shell_path).read()
     html = open(target, 'w')
     if not Compression.on:
+      if debug_level >= 4:
+        match = re.match('.*?<script[^>]*>{{{ SCRIPT_CODE }}}</script>', shell,
+            re.DOTALL)
+        if match is None:
+          raise RuntimeError('Could not find script insertion point')
+        generate_source_map(target, match.group().count('\n'))
       html.write(shell.replace('{{{ SCRIPT_CODE }}}', open(final).read()))
     else:
       # Compress the main code
@@ -1673,6 +1711,8 @@ try:
         from tools.split import split_javascript_file
         split_javascript_file(final, unsuffixed(target), split_js_file)
     else:
+        if debug_level >= 4: generate_source_map(target)
+
         # copy final JS to output
         shutil.move(final, target)