77 files changed, 18863 insertions, 642 deletions
diff --git a/.gitignore b/.gitignore
index b40594b6..7a0f2f78 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.diff
 *.pyc
 *~
+*.bc
 
diff --git a/AUTHORS b/AUTHORS
index 8a46cef7..248c345d 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -13,5 +13,6 @@ under the licensing terms detailed in LICENSE.
 * David Yip <yipdw@member.fsf.org>
 * Julien Hamaide <julien.hamaide@gmail.com>
 * Ehsan Akhgari <ehsan.akhgari@gmail.com> (copyright owned by Mozilla Foundation)
-
-
+* Adrian Taylor <adrian@macrobug.com>
+* Richard Assar <richard.assar@gmail.com>
+* Nathan Hammond <emscripten@nathanhammond.com>
diff --git a/emcc b/emcc
index c57bec4f..f23777b9 100755
--- a/emcc
+++ b/emcc
@@ -74,10 +74,19 @@ emcc can be influenced by a few environment variables:
   EMMAKEN_COMPILER - The compiler to be used, if you don't want the default clang.
 '''
 
-import os, sys, shutil, tempfile
-from subprocess import Popen, PIPE, STDOUT
+import os, sys, shutil, tempfile, subprocess
+from subprocess import PIPE, STDOUT
 from tools import shared
 
+def execute(cmd, *args, **kw):
+  try:
+    return subprocess.Popen(cmd, *args, **kw).communicate() # let compiler frontend print directly, so colors are saved (PIPE kills that)
+  except:
+    if not isinstance(cmd, str):
+      cmd = ' '.join(cmd)
+    print >> sys.stderr, 'Invoking Process failed: <<< ' + cmd + ' >>>'
+    raise
+
 # Mapping of emcc opt levels to llvm opt levels. We use llvm opt level 3 in emcc opt
 # levels 2 and 3 (emcc 3 is unsafe opts, so unsuitable for the only level to get
 # llvm opt level 3, and speed-wise emcc level 2 is already the slowest/most optimizing
@@ -99,8 +108,11 @@ LEAVE_INPUTS_RAW = os.environ.get('EMCC_LEAVE_INPUTS_RAW') # Do not compile .ll
 AUTODEBUG = os.environ.get('EMCC_AUTODEBUG') # If set to 1, we will run the autodebugger (the automatic debugging tool, see tools/autodebugger).
                                              # Note that this will disable inclusion of libraries. This is useful because including
                                              # dlmalloc makes it hard to compare native and js builds
+EMCC_CFLAGS = os.environ.get('EMCC_CFLAGS') # Additional compiler flags that we treat as if they were passed to us on the commandline
+
+if DEBUG: print >> sys.stderr, '\nemcc invocation: ', ' '.join(sys.argv), (' + ' + EMCC_CFLAGS if EMCC_CFLAGS else '')
+if EMCC_CFLAGS: sys.argv.append(EMCC_CFLAGS)
 
-if DEBUG: print >> sys.stderr, 'emcc: ', ' '.join(sys.argv)
 if DEBUG and LEAVE_INPUTS_RAW: print >> sys.stderr, 'emcc: leaving inputs raw'
 
 stdout = PIPE if not DEBUG else None # suppress output of child processes
@@ -149,7 +161,8 @@ Options that are modified or new in %s include:
                            -O2 and then adding dangerous optimizations one
                            by one.
   -s OPTION=VALUE          JavaScript code generation option passed
-                           into the emscripten compiler
+                           into the emscripten compiler. For the
+                           available options, see src/settings.js
   --typed-arrays <mode>    0: No typed arrays
                            1: Parallel typed arrays
                            2: Shared (C-like) typed arrays (default)
@@ -173,6 +186,10 @@ Options that are modified or new in %s include:
                            list of arguments, for example, <cmd> of
                            "python processor.py" will cause a python
                            script to be run.
+  --pre-js <file>          A file whose contents are added before the
+                           generated code
+  --post-js <file>         A file whose contents are added after the
+                           generated code
   --compress <on>          0: Do not compress the generated JavaScript's
                               whitespace (default if closure compiler
                               will not be run)
@@ -181,6 +198,19 @@ Options that are modified or new in %s include:
                               will be run). Note that this by itself
                               will not minify the code (closure does
                               that)
+  --embed-file <filename>  A file to embed inside the generated
+                           JavaScript. The compiled code will be able
+                           to access the file in the current directory
+                           with the same basename as given here (that is,
+                           just the filename, without a path to it).
+  --ignore-dynamic-linking Normally emcc will treat dynamic linking like
+                           static linking, by linking in the code from
+                           the dynamic library. This fails if the same
+                           dynamic library is linked more than once.
+                           With this option, dynamic linking is ignored,
+                           which allows the build system to proceed without
+                           errors. However, you will need to manually
+                           link to the shared libraries later on yourself.
   --shell-file <path>      The path name to a skeleton HTML file used
                            when generating HTML output. The shell file
                            used needs to have this token inside it:
@@ -195,7 +225,7 @@ be generated:
   <name>.js                JavaScript (default)
   <name>.html              HTML with embedded JavaScript
   <name>.bc                LLVM bitcode
-  <name>.o                 LLVM bitcode
+  <name>.o                 LLVM bitcode (same as .bc)
 
 The -c option (which tells gcc not to run the linker) will
 cause LLVM bitcode to be generated, as %s only generates
@@ -205,6 +235,9 @@ The input file(s) can be either source code files that
 Clang can handle (C or C++), LLVM bitcode in binary form,
 or LLVM assembly files in human-readable form.
 
+emcc is affected by several environment variables. For details, view
+the source of emcc (search for 'os.environ').
+
 ''' % (this, this, this)
   exit(0)
 
@@ -240,8 +273,13 @@ if EMMAKEN_CFLAGS: CC_ADDITIONAL_ARGS += EMMAKEN_CFLAGS.split(' ')
 
 SOURCE_SUFFIXES = ('.c', '.cpp', '.cxx', '.cc')
 BITCODE_SUFFIXES = ('.bc', '.o')
-SHAREDLIB_SUFFIXES = ('.dylib', '.so', '.dll')
+DYNAMICLIB_SUFFIXES = ('.dylib', '.so', '.dll')
+STATICLIB_SUFFIXES = ('.a',)
 ASSEMBLY_SUFFIXES = ('.ll',)
+LIB_PREFIXES = ('', 'lib')
+
+def suffix(name):
+  return name.split('.')[:-1]
 
 def unsuffixed(name):
   return '.'.join(name.split('.')[:-1])
@@ -305,7 +343,11 @@ try:
   llvm_opts = None
   closure = None
   js_transform = None
+  pre_js = None
+  post_js = None
   compress_whitespace = None
+  embed_files = []
+  ignore_dynamic_linking = False
   shell_path = shared.path_from_root('src', 'shell.html')
 
   def check_bad_eq(arg):
@@ -334,17 +376,35 @@ try:
       js_transform = newargs[i+1]
       newargs[i] = ''
       newargs[i+1] = ''
+    elif newargs[i].startswith('--pre-js'):
+      check_bad_eq(newargs[i])
+      pre_js = open(newargs[i+1]).read()
+      newargs[i] = ''
+      newargs[i+1] = ''
+    elif newargs[i].startswith('--post-js'):
+      check_bad_eq(newargs[i])
+      post_js = open(newargs[i+1]).read()
+      newargs[i] = ''
+      newargs[i+1] = ''
     elif newargs[i].startswith('--compress'):
       check_bad_eq(newargs[i])
       compress_whitespace = int(newargs[i+1])
       newargs[i] = ''
       newargs[i+1] = ''
+    elif newargs[i].startswith('--embed-file'):
+      check_bad_eq(newargs[i])
+      embed_files.append(newargs[i+1])
+      newargs[i] = ''
+      newargs[i+1] = ''
     elif newargs[i] == '-MF': # clang cannot handle this, so we fake it
       f = open(newargs[i+1], 'w')
       f.write('\n')
       f.close()
       newargs[i] = ''
       newargs[i+1] = ''
+    elif newargs[i] == '--ignore-dynamic-linking':
+      ignore_dynamic_linking = True
+      newargs[i] = ''
     elif newargs[i].startswith('--shell-file'):
       check_bad_eq(newargs[i])
       shell_path = newargs[i+1]
@@ -373,12 +433,21 @@ try:
       newargs[i+1] = ''
   newargs = [ arg for arg in newargs if arg is not '' ]
 
+  # Find input files
+
   input_files = []
   has_source_inputs = False
+  lib_dirs = []
+  libs = []
   for i in range(len(newargs)): # find input files XXX this a simple heuristic. we should really analyze based on a full understanding of gcc params,
                                 # right now we just assume that what is left contains no more |-x OPT| things
     arg = newargs[i]
-    if arg.endswith(SOURCE_SUFFIXES + BITCODE_SUFFIXES + SHAREDLIB_SUFFIXES + ASSEMBLY_SUFFIXES) or shared.Building.is_ar(arg): # we already removed -o <target>, so all these should be inputs
+
+    if i > 0:
+      prev = newargs[i-1]
+      if prev == '-MT': continue # ignore this gcc-style argument
+
+    if arg.endswith(SOURCE_SUFFIXES + BITCODE_SUFFIXES + DYNAMICLIB_SUFFIXES + ASSEMBLY_SUFFIXES) or shared.Building.is_ar(arg): # we already removed -o <target>, so all these should be inputs
       newargs[i] = ''
       if os.path.exists(arg):
         if arg.endswith(SOURCE_SUFFIXES):
@@ -392,9 +461,38 @@ try:
             print >> sys.stderr, 'emcc: %s: warning: Not valid LLVM bitcode' % arg
       else:
         print >> sys.stderr, 'emcc: %s: warning: No such file or directory' % arg
+    elif arg.startswith('-L'):
+      lib_dirs.append(arg[2:])
+      newargs[i] = ''
+    elif arg.startswith('-l'):
+      libs.append(arg[2:])
+      newargs[i] = ''
   newargs = [ arg for arg in newargs if arg is not '' ]
 
-  assert len(input_files) > 0, 'emcc: no input files'
+  # Find library files
+  for lib in libs:
+    if DEBUG: print >> sys.stderr, 'emcc: looking for library "%s"' % lib
+    found = False
+    for prefix in LIB_PREFIXES:
+      for suff in STATICLIB_SUFFIXES + DYNAMICLIB_SUFFIXES:
+        name = prefix + lib + suff
+        for lib_dir in lib_dirs:
+          path = os.path.join(lib_dir, name)
+          if os.path.exists(path):
+            if DEBUG: print >> sys.stderr, 'emcc: found library "%s" at %s' % (lib, path)
+            input_files.append(path)
+            found = True
+            break
+        if found: break
+      if found: break
+
+  if ignore_dynamic_linking:
+    input_files = filter(lambda input_file: not input_file.endswith(DYNAMICLIB_SUFFIXES), input_files)
+
+  if len(input_files) == 0:
+    print >> sys.stderr, 'emcc: no input files'
+    print >> sys.stderr, 'note that input files without a known suffix are ignored, make sure your input files end with one of: ' + str(SOURCE_SUFFIXES + BITCODE_SUFFIXES + DYNAMICLIB_SUFFIXES + STATICLIB_SUFFIXES + ASSEMBLY_SUFFIXES)
+    exit(0)
 
   newargs += CC_ADDITIONAL_ARGS
 
@@ -436,7 +534,7 @@ try:
       temp_files.append(output_file)
       args = newargs + ['-emit-llvm', '-c', input_file, '-o', output_file]
       if DEBUG: print >> sys.stderr, "emcc running:", call, ' '.join(args)
-      Popen([call] + args).communicate() # let compiler frontend print directly, so colors are saved (PIPE kills that)
+      execute([call] + args) # let compiler frontend print directly, so colors are saved (PIPE kills that)
       if not os.path.exists(output_file):
         print >> sys.stderr, 'emcc: compiler frontend failed to generate LLVM bitcode, halting'
         sys.exit(1)
@@ -446,7 +544,7 @@ try:
         temp_file = in_temp(unsuffixed_basename(input_file) + '.o')
         shutil.copyfile(input_file, temp_file)
         temp_files.append(temp_file)
-      elif input_file.endswith(SHAREDLIB_SUFFIXES) or shared.Building.is_ar(input_file):
+      elif input_file.endswith(DYNAMICLIB_SUFFIXES) or shared.Building.is_ar(input_file):
         if DEBUG: print >> sys.stderr, 'emcc: copying library file: ', input_file
         temp_file = in_temp(os.path.basename(input_file))
         shutil.copyfile(input_file, temp_file)
@@ -460,6 +558,8 @@ try:
           shared.Building.llvm_as(input_file, temp_file)
           temp_files.append(temp_file)
 
+  if not LEAVE_INPUTS_RAW: assert len(temp_files) == len(input_files)
+
   # If we were just asked to generate bitcode, stop there
   if final_suffix not in ['js', 'html']:
     if llvm_opts > 0:
@@ -477,7 +577,7 @@ try:
         ld_args = temp_files + ['-b', specified_target]
                   #[arg.split('-Wl,')[1] for arg in filter(lambda arg: arg.startswith('-Wl,'), sys.argv)]
         if DEBUG: print >> sys.stderr, 'emcc: link: ' + str(ld_args)
-        Popen([shared.LLVM_LD, '-disable-opt'] + ld_args).communicate()
+        execute([shared.LLVM_LD, '-disable-opt'] + ld_args)
     exit(0)
 
   ## Continue on to create JavaScript
@@ -495,9 +595,9 @@ try:
     # dlmalloc
     def create_dlmalloc():
       if DEBUG: print >> sys.stderr, 'emcc: building dlmalloc for cache'
-      Popen([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=stdout, stderr=stderr).communicate()
+      execute([shared.EMCC, shared.path_from_root('system', 'lib', 'dlmalloc.c'), '-g', '-o', in_temp('dlmalloc.o')], stdout=stdout, stderr=stderr)
       # we include the libc++ new stuff here, so that the common case of using just new/delete is quick to link
-      Popen([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=stdout, stderr=stderr).communicate()
+      execute([shared.EMXX, shared.path_from_root('system', 'lib', 'libcxx', 'new.cpp'), '-g', '-o', in_temp('new.o')], stdout=stdout, stderr=stderr)
       shared.Building.link([in_temp('dlmalloc.o'), in_temp('new.o')], in_temp('dlmalloc_full.o'))
       return in_temp('dlmalloc_full.o')
     def fix_dlmalloc():
@@ -528,14 +628,28 @@ try:
     libcxx_symbols = filter(lambda symbol: symbol not in dlmalloc_symbols, libcxx_symbols)
     libcxx_symbols = set(libcxx_symbols)
 
+    # libcxxabi - just for dynamic_cast for now
+    def create_libcxxabi():
+      if DEBUG: print >> sys.stderr, 'emcc: building libcxxabi for cache'
+      shared.Building.build_library('libcxxabi', shared.EMSCRIPTEN_TEMP_DIR, shared.EMSCRIPTEN_TEMP_DIR, ['libcxxabi.bc'], configure=None, copy_project=True, source_dir=shared.path_from_root('system', 'lib', 'libcxxabi'))
+      return os.path.join(shared.EMSCRIPTEN_TEMP_DIR, 'libcxxabi', 'libcxxabi.bc')
+    def fix_libcxxabi():
+      assert shared.Settings.QUANTUM_SIZE == 4, 'We do not support libc++abi with QUANTUM_SIZE == 1'
+      print >> sys.stderr, 'emcc: warning: using libcxxabi, this may need CORRECT_* options'
+      #shared.Settings.CORRECT_SIGNS = shared.Settings.CORRECT_OVERFLOWS = shared.Settings.CORRECT_ROUNDINGS = 1
+    libcxxabi_symbols = map(lambda line: line.strip().split(' ')[1], open(shared.path_from_root('system', 'lib', 'libcxxabi', 'symbols')).readlines())
+    libcxxabi_symbols = filter(lambda symbol: symbol not in dlmalloc_symbols, libcxxabi_symbols)
+    libcxxabi_symbols = set(libcxxabi_symbols)
+
     force = False # If we have libcxx, we must force inclusion of dlmalloc, since libcxx uses new internally. Note: this is kind of hacky
 
-    for name, create, fix, library_symbols in [('libcxx',   create_libcxx,   fix_libcxx,   libcxx_symbols),
-                                               ('dlmalloc', cre